//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: 
//
//
// The dx8 implementation of the shader API
//===========================================================================//

/*
DX9 todo:
-make the transforms in the older shaders match the transforms in lightmappedgeneric
-fix polyoffset for hardware that doesn't support D3DRS_SLOPESCALEDEPTHBIAS and D3DRS_DEPTHBIAS
	- code is there, I think matrix offset just needs tweaking
-fix forcehardwaresync - implement texture locking for hardware that doesn't support async query
-get the format for GetAdapterModeCount and EnumAdapterModes from somewhere (shaderapidx8.cpp, GetModeCount, GetModeInfo)
-record frame sync objects (allocframesyncobjects, free framesync objects, ForceHardwareSync)
-Need to fix ENVMAPMASKSCALE, BUMPOFFSET in lightmappedgeneric*.cpp and vertexlitgeneric*.cpp
fix this:
		// FIXME: This also depends on the vertex format and whether or not we are static lit in dx9
	#ifndef SHADERAPIDX9
		if (m_DynamicState.m_VertexShader != shader) // garymcthack
	#endif // !SHADERAPIDX9
unrelated to dx9:
mat_fullbright 1 doesn't work properly on alpha materials in testroom_standards
*/
#define DISABLE_PROTECTED_THINGS
#include "shaderapidx8.h"
#include "shaderapidx8_global.h"
#include "shadershadowdx8.h"
#include "locald3dtypes.h"												   
#include "utlvector.h"
#include "IHardwareConfigInternal.h"
#include "utlstack.h"
#include "shaderapi/ishaderutil.h"
#include "shaderapi/commandbuffer.h"
#include "shaderapidx8_global.h"
#include "materialsystem/imaterialsystem.h"
#include "materialsystem/itexture.h"
#include "imaterialinternal.h"
#include "imeshdx8.h"
#include "materialsystem/imorph.h"
#include "colorformatdx8.h"
#include "texturedx8.h"
#include "textureheap.h"
#include "interface.h"
#include "utlrbtree.h"
#include "utlsymbol.h"
#include "tier1/strtools.h"
#include "recording.h"
#ifndef _X360
#include <crtmemdebug.h>
#endif
#include "vertexshaderdx8.h"
#include "filesystem.h"
#include "mathlib/mathlib.h"
#include "materialsystem/materialsystem_config.h"
#include "worldsize.h"
#include "TransitionTable.h"
#include "tier0/vcrmode.h"
#include "tier0/vprof.h"
#include "tier1/tier1.h"
#include "tier1/utlbuffer.h"
#include "vertexdecl.h"
#include "tier0/icommandline.h"
#include "IShaderSystem.h"
#include "tier1/convar.h"
#include "tier1/KeyValues.h"
#include "Color.h"
#ifdef RECORDING
#include "materialsystem/IShader.h"
#endif
#include "../stdshaders/common_hlsl_cpp_consts.h" // hack hack hack!
#include "KeyValues.h"
#include "bitmap/imageformat.h"
#include "materialsystem/idebugtextureinfo.h"
#include "tier1/utllinkedlist.h"
#include "vtf/vtf.h"
#include "datacache/idatacache.h"
#include "renderparm.h"
#include "tier2/tier2.h"
#include "materialsystem/deformations.h"
#include "bitmap/tgawriter.h"
#include "tier0/icommandline.h"
#include "togl/rendermechanism.h"  // provides GLMPRINTF/GLMPRINTSTR / GLMPRINTEXT macros which only activate if GLMDEBUG is nonzero and POSIX is defined.

#if defined( _X360 )
#include "xbox/xbox_console.h"
#include "xbox/xbox_win32stubs.h"
#include "xbox/xbox_launch.h"
#endif
#include "tier0/tslist.h"
#ifndef _X360
#include "wmi.h"
#endif
#include "filesystem/IQueuedLoader.h"
#include "shaderdevicedx8.h"
#include "togl/rendermechanism.h"

// Define this if you want to use a stubbed d3d.
//#define STUBD3D

#ifdef STUBD3D
#include "stubd3ddevice.h"
#endif

#include "winutils.h"

// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"

#if defined( OSX )
	typedef unsigned int DWORD;
	typedef DWORD* LPDWORD;
#endif

#ifdef _WIN32
#pragma warning (disable:4189)
#endif

ConVar mat_texture_limit( "mat_texture_limit", "-1", FCVAR_NEVER_AS_STRING, 
	"If this value is not -1, the material system will limit the amount of texture memory it uses in a frame."
	" Useful for identifying performance cliffs. The value is in kilobytes." );

ConVar mat_frame_sync_enable( "mat_frame_sync_enable", "1", FCVAR_CHEAT );
ConVar mat_frame_sync_force_texture( "mat_frame_sync_force_texture", "0", FCVAR_CHEAT, "Force frame syncing to lock a managed texture." );


#if defined( _X360 )
ConVar mat_texturecachesize( "mat_texturecachesize", "176" );
ConVar mat_force_flush_texturecache( "mat_force_flush_texturecache", "0" );
#endif

extern ConVar mat_debugalttab;

#define ALLOW_SMP_ACCESS 0

#if ALLOW_SMP_ACCESS
static ConVar mat_use_smp( "mat_use_smp", "0" );
#endif

// Convars for driving PIX (not all hooked up yet...JasonM)
static ConVar r_pix_start( "r_pix_start", "0" );
static ConVar r_pix_recordframes( "r_pix_recordframes", "0" );


#define D3DDeviceWrapper IDirect3DDevice9

//-----------------------------------------------------------------------------
// Some important enumerations
//-----------------------------------------------------------------------------
enum
{
	MAX_VERTEX_TEXTURE_COUNT = 4,
};


//-----------------------------------------------------------------------------
// These board states change with high frequency; are not shadowed
//-----------------------------------------------------------------------------
struct TextureStageState_t
{
	D3DTEXTURETRANSFORMFLAGS m_TextureTransformFlags;
	float					m_BumpEnvMat00;
	float					m_BumpEnvMat01;
	float					m_BumpEnvMat10;
	float					m_BumpEnvMat11;
};

struct SamplerState_t
{
	ShaderAPITextureHandle_t	m_BoundTexture;
	D3DTEXTUREADDRESS			m_UTexWrap;
	D3DTEXTUREADDRESS			m_VTexWrap;
	D3DTEXTUREADDRESS			m_WTexWrap;
	D3DTEXTUREFILTERTYPE		m_MagFilter;
	D3DTEXTUREFILTERTYPE		m_MinFilter;
	D3DTEXTUREFILTERTYPE		m_MipFilter;
	int							m_FinestMipmapLevel;
	float						m_LodBias;
	int							m_nAnisotropicLevel;
	bool						m_TextureEnable;
	bool						m_SRGBReadEnable;
};


//-----------------------------------------------------------------------------
// State related to vertex textures
//-----------------------------------------------------------------------------
struct VertexTextureState_t
{
	ShaderAPITextureHandle_t	m_BoundTexture;
	D3DTEXTUREADDRESS			m_UTexWrap;
	D3DTEXTUREADDRESS			m_VTexWrap;
	D3DTEXTUREFILTERTYPE		m_MagFilter;
	D3DTEXTUREFILTERTYPE		m_MinFilter;
	D3DTEXTUREFILTERTYPE		m_MipFilter;
};


enum TransformType_t
{
	TRANSFORM_IS_IDENTITY = 0,
	TRANSFORM_IS_CAMERA_TO_WORLD,
	TRANSFORM_IS_GENERAL,
};

enum TransformDirtyBits_t
{
	STATE_CHANGED_VERTEX_SHADER = 0x1,
	STATE_CHANGED_FIXED_FUNCTION = 0x2,
	STATE_CHANGED = 0x3
};

enum
{
#if !defined( _X360 )
	MAX_NUM_RENDERSTATES = ( D3DRS_BLENDOPALPHA+1 ),
#else
	MAX_NUM_RENDERSTATES = D3DRS_MAX,                 
#endif
//	MORPH_TARGET_FACTOR_COUNT = VERTEX_SHADER_MORPH_TARGET_FACTOR_COUNT * 4,
};

struct DynamicState_t
{
	// Constant color
	unsigned int	m_ConstantColor;

	// Normalize normals?
	bool			m_NormalizeNormals;

	// Viewport state
	D3DVIEWPORT9	m_Viewport;

	// Transform state
	D3DXMATRIX		m_Transform[NUM_MATRIX_MODES];
	unsigned char	m_TransformType[NUM_MATRIX_MODES];
	unsigned char	m_TransformChanged[NUM_MATRIX_MODES];

	// Ambient light color
	D3DCOLOR		m_Ambient;
	D3DLIGHT		m_Lights[MAX_NUM_LIGHTS];
	LightDesc_t		m_LightDescs[MAX_NUM_LIGHTS];
	bool			m_LightEnable[MAX_NUM_LIGHTS];
	Vector4D		m_AmbientLightCube[6];
	unsigned char	m_LightChanged[MAX_NUM_LIGHTS];
	unsigned char	m_LightEnableChanged[MAX_NUM_LIGHTS];
	VertexShaderLightTypes_t	m_LightType[MAX_NUM_LIGHTS];
	Vector			m_vLightingOrigin;
	int				m_NumLights;

	// Shade mode
	D3DSHADEMODE	m_ShadeMode;

	// Clear color
	D3DCOLOR		m_ClearColor;

	// Fog
	D3DCOLOR		m_FogColor;
	float			m_PixelFogColor[4];
	bool			m_bFogGammaCorrectionDisabled;
	bool			m_FogEnable;
	MaterialFogMode_t	m_SceneFog;
	D3DFOGMODE		m_FogMode;
	float			m_FogStart;
	float			m_FogEnd;
	float			m_FogZ;
	float			m_FogMaxDensity;

	float			m_HeightClipZ;
	MaterialHeightClipMode_t m_HeightClipMode;	
	
	// user clip planes
	int				m_UserClipPlaneEnabled;
	int				m_UserClipPlaneChanged;
	D3DXPLANE		m_UserClipPlaneWorld[MAXUSERCLIPPLANES];
	D3DXPLANE		m_UserClipPlaneProj[MAXUSERCLIPPLANES];
	bool			m_UserClipLastUpdatedUsingFixedFunction;

	bool			m_FastClipEnabled;
	bool			m_bFastClipPlaneChanged;
	D3DXPLANE		m_FastClipPlane;

	// Used when overriding the user clip plane
	bool			m_bUserClipTransformOverride;
	D3DXMATRIX		m_UserClipTransform;

	// Cull mode
	D3DCULL			m_DesiredCullMode;
	D3DCULL			m_CullMode;
	bool			m_bCullEnabled;
	
	// Skinning
	D3DVERTEXBLENDFLAGS	m_VertexBlend;
	int					m_NumBones;

	// Pixel and vertex shader constants...
	Vector4D*		m_pVectorVertexShaderConstant;
	BOOL*			m_pBooleanVertexShaderConstant;
	IntVector4D*	m_pIntegerVertexShaderConstant;
	Vector4D*		m_pVectorPixelShaderConstant;
	BOOL*			m_pBooleanPixelShaderConstant;
	IntVector4D*	m_pIntegerPixelShaderConstant;

	// Texture stage state
	TextureStageState_t m_TextureStage[MAX_TEXTURE_STAGES];
	SamplerState_t m_SamplerState[MAX_SAMPLERS];

	// Vertex texture stage state
	VertexTextureState_t m_VertexTextureState[MAX_VERTEX_TEXTURE_COUNT];

	DWORD m_RenderState[MAX_NUM_RENDERSTATES];

	RECT  m_ScissorRect;

	IDirect3DVertexDeclaration9 *m_pVertexDecl;

	bool	m_bSRGBWritesEnabled;
	bool	m_bHWMorphingEnabled;

	float	m_DestAlphaDepthRange; //Dest alpha writes compress the depth to get better results. This holds the default setting that can be overriden with r_destalpharange

#if defined( _X360 )
	int		m_iVertexShaderGPRAllocation; //only need to track vertex shader
	bool	m_bBuffer2Frames;
#endif
	
	DynamicState_t() = default;

private:
	DynamicState_t( DynamicState_t const& );
};

//-----------------------------------------------------------------------------
// Method to queue up dirty dynamic state change calls
//-----------------------------------------------------------------------------
typedef void (*StateCommitFunc_t)( IDirect3DDevice9 *pDevice, const DynamicState_t &desiredState, DynamicState_t &currentState, bool bForce );
static void CommitSetViewports( IDirect3DDevice9 *pDevice, const DynamicState_t &desiredState, DynamicState_t &currentState, bool bForce );

// NOTE: It's slightly memory inefficient, and definitely not typesafe,
// to put all commit funcs into the same table (vs, ff, per-draw, per-pass),
// but it makes the code a heck of a lot simpler and smaller.
enum CommitFunc_t
{
	COMMIT_FUNC_CommitVertexTextures = 0,
	COMMIT_FUNC_CommitFlexWeights,
	COMMIT_FUNC_CommitSetScissorRect,
	COMMIT_FUNC_CommitSetViewports,

#if defined( _X360 )
	COMMIT_FUNC_CommitShaderGPRs,
#endif

	COMMIT_FUNC_COUNT,
	COMMIT_FUNC_BYTE_COUNT = ( COMMIT_FUNC_COUNT + 0x7 ) >> 3,
};

enum CommitFuncType_t
{
	COMMIT_PER_DRAW = 0,
	COMMIT_PER_PASS,

	COMMIT_FUNC_TYPE_COUNT,
};

enum CommitShaderType_t
{
	COMMIT_FIXED_FUNCTION = 0,
	COMMIT_VERTEX_SHADER,
	COMMIT_ALWAYS,

	COMMIT_SHADER_TYPE_COUNT,
};


#define ADD_COMMIT_FUNC( _func, _shader, _func_name )	\
	if ( !IsCommitFuncInUse( _func, _shader, COMMIT_FUNC_ ## _func_name ) )	\
	{																		\
		AddCommitFunc( _func, _shader, _func_name );						\
		MarkCommitFuncInUse( _func, _shader, COMMIT_FUNC_ ## _func_name );	\
	}

#define ADD_RENDERSTATE_FUNC( _func, _shader, _func_name, _state, _val )	\
	if ( m_bResettingRenderState || (m_DesiredState._state != _val) )	\
	{																		\
		m_DesiredState._state = _val;									\
		ADD_COMMIT_FUNC( _func, _shader, _func_name )						\
	}

#define ADD_VERTEX_TEXTURE_FUNC( _func, _shader, _func_name, _stage, _state, _val )	\
	Assert( ( int )_stage < MAX_VERTEX_TEXTURE_COUNT );							\
	if ( m_bResettingRenderState || (m_DesiredState.m_VertexTextureState[_stage]._state != _val) )	\
	{																		\
		m_DesiredState.m_VertexTextureState[_stage]._state = _val;		\
		ADD_COMMIT_FUNC( _func, _shader, _func_name )						\
	}


//-----------------------------------------------------------------------------
// Check render state support at compile time instead of runtime
//-----------------------------------------------------------------------------
#define SetSupportedRenderState( _state, _val )										\
	{																				\
		if( _state != D3DRS_NOTSUPPORTED )											\
		{																			\
			SetRenderState( _state, _val, false );									\
		}																			\
	}

#define SetSupportedRenderStateForce( _state, _val )								\
	{																				\
		if( _state != D3DRS_NOTSUPPORTED )											\
		{																			\
			SetRenderStateForce( _state, _val );									\
		}																			\
	}

//-----------------------------------------------------------------------------
// Allocated textures
//-----------------------------------------------------------------------------
struct Texture_t
{
	Texture_t()
	{
		m_Flags = 0;
		m_Count = 1;
		m_CountIndex = 0;
		m_nTimesBoundMax = 0;
		m_nTimesBoundThisFrame = 0;
		m_pTexture = NULL;
		m_ppTexture = NULL;
		m_ImageFormat = IMAGE_FORMAT_RGBA8888;
		m_pTextureGroupCounterGlobal = NULL;
		m_pTextureGroupCounterFrame = NULL;
		m_FinestMipmapLevel = 0;
		m_LodBias = 0.0f;
	}

	// FIXME: Compress this info
	D3DTEXTUREADDRESS		m_UTexWrap;
	D3DTEXTUREADDRESS		m_VTexWrap;
	D3DTEXTUREADDRESS		m_WTexWrap;
	D3DTEXTUREFILTERTYPE	m_MagFilter;
	D3DTEXTUREFILTERTYPE	m_MinFilter;
	D3DTEXTUREFILTERTYPE	m_MipFilter;
	int						m_FinestMipmapLevel;
	float					m_LodBias;

	unsigned char			m_NumLevels;
	unsigned char			m_SwitchNeeded;	// Do we need to advance the current copy?
	unsigned char			m_NumCopies;	// copies are used to optimize procedural textures
	unsigned char			m_CurrentCopy;	// the current copy we're using...

	int						m_CreationFlags;

	CUtlSymbol				m_DebugName;
	CUtlSymbol				m_TextureGroupName;
	int						*m_pTextureGroupCounterGlobal;	// Global counter for this texture's group.
	int						*m_pTextureGroupCounterFrame;	// Per-frame global counter for this texture's group.

	// stats stuff
	int						m_SizeBytes;
	int						m_SizeTexels;
	int						m_LastBoundFrame;
	int						m_nTimesBoundMax;
	int						m_nTimesBoundThisFrame;

	enum Flags_t
	{
		IS_ALLOCATED             = 0x0001,
		IS_DEPTH_STENCIL         = 0x0002,
		IS_DEPTH_STENCIL_TEXTURE = 0x0004,	// depth stencil texture, not surface
		IS_RENDERABLE            = ( IS_DEPTH_STENCIL | IS_ALLOCATED ),
		IS_LOCKABLE				 = 0x0008,
		IS_FINALIZED             = 0x0010,	// 360: completed async hi-res load
		IS_FAILED                = 0x0020,	// 360: failed during load
		CAN_CONVERT_FORMAT       = 0x0040,	// 360: allow format conversion
		IS_LINEAR                = 0x0080,	// 360: unswizzled linear format
		IS_RENDER_TARGET         = 0x0100,	// 360: marks a render target texture source
		IS_RENDER_TARGET_SURFACE = 0x0200,	// 360: marks a render target surface target
		IS_VERTEX_TEXTURE		 = 0x0800,
	};

	short					m_Width;
	short					m_Height;
	short					m_Depth;
	unsigned short			m_Flags;

	typedef IDirect3DBaseTexture	*IDirect3DBaseTexturePtr;
	typedef IDirect3DBaseTexture	**IDirect3DBaseTexturePtrPtr;
	typedef IDirect3DSurface		*IDirect3DSurfacePtr;

	IDirect3DBaseTexturePtr GetTexture( void )
	{
		Assert( m_NumCopies == 1 );
		Assert( !( m_Flags & IS_DEPTH_STENCIL ) );
		return m_pTexture;
	}
	IDirect3DBaseTexturePtr GetTexture( int copy )
	{
		Assert( m_NumCopies > 1 );
		Assert( !( m_Flags & IS_DEPTH_STENCIL ) );
		return m_ppTexture[copy];
	}
	IDirect3DBaseTexturePtrPtr &GetTextureArray( void )
	{
		Assert( m_NumCopies > 1 );
		Assert( !( m_Flags & IS_DEPTH_STENCIL ) );
		return m_ppTexture;
	}

	IDirect3DSurfacePtr &GetDepthStencilSurface( void )
	{
		Assert( m_NumCopies == 1 );
		Assert( (m_Flags & IS_DEPTH_STENCIL) );
		return m_pDepthStencilSurface;
	}

	IDirect3DSurfacePtr &GetRenderTargetSurface( bool bSRGB )
	{
		Assert( m_NumCopies == 1 );
		Assert( m_Flags & IS_RENDER_TARGET_SURFACE );
		return m_pRenderTargetSurface[bSRGB];
	}

	void SetTexture( IDirect3DBaseTexturePtr pPtr )
	{
		m_pTexture = pPtr;
	}
	void SetTexture( int copy, IDirect3DBaseTexturePtr pPtr )
	{
		m_ppTexture[copy] = pPtr;
	}

	int GetMemUsage() const
	{
		return m_SizeBytes;
	}

	int GetWidth() const
	{
		return ( int )m_Width;
	}

	int GetHeight() const
	{
		return ( int )m_Height;
	}

	int GetDepth() const
	{
		return ( int )m_Depth;
	}

	int GetLodClamp() const
	{
		return m_FinestMipmapLevel;
	}

	void SetImageFormat( ImageFormat format )
	{
		m_ImageFormat = format;
	}
	ImageFormat GetImageFormat() const
	{
		return m_ImageFormat;
	}

private:
	union
	{
		IDirect3DBaseTexture	*m_pTexture;				// used when there's one copy
		IDirect3DBaseTexture	**m_ppTexture;				// used when there are more than one copies
		IDirect3DSurface		*m_pDepthStencilSurface;	// used when there's one depth stencil surface
		IDirect3DSurface		*m_pRenderTargetSurface[2];
	};

	ImageFormat m_ImageFormat;

public:
	short m_Count;
	short m_CountIndex;

	short GetCount() const
	{
		return m_Count;
	}
};

#define MAX_DEFORMATION_PARAMETERS 16
#define DEFORMATION_STACK_DEPTH 10

struct Deformation_t
{
	int m_nDeformationType;
	int m_nNumParameters;
	float m_flDeformationParameters[MAX_DEFORMATION_PARAMETERS];
};


//-----------------------------------------------------------------------------
// The DX8 implementation of the shader API
//-----------------------------------------------------------------------------
class CShaderAPIDx8 : public CShaderDeviceDx8, public IShaderAPIDX8, public IDebugTextureInfo
{
	typedef CShaderDeviceDx8 BaseClass;

public:
	// constructor, destructor
	CShaderAPIDx8( );
	virtual ~CShaderAPIDx8();

	// Methods of IShaderAPI
public:
	virtual void SetViewports( int nCount, const ShaderViewport_t* pViewports );
	virtual int  GetViewports( ShaderViewport_t* pViewports, int nMax ) const;
	virtual void ClearBuffers( bool bClearColor, bool bClearDepth, bool bClearStencil, int renderTargetWidth, int renderTargetHeight );
	virtual void ClearColor3ub( unsigned char r, unsigned char g, unsigned char b );
	virtual void ClearColor4ub( unsigned char r, unsigned char g, unsigned char b, unsigned char a );
	virtual void BindVertexShader( VertexShaderHandle_t hVertexShader );
	virtual void BindGeometryShader( GeometryShaderHandle_t hGeometryShader );
	virtual void BindPixelShader( PixelShaderHandle_t hPixelShader );
	virtual void SetRasterState( const ShaderRasterState_t& state );
	virtual void SetFlexWeights( int nFirstWeight, int nCount, const MorphWeight_t* pWeights );

	// Methods of IShaderDynamicAPI
public:
	virtual void GetBackBufferDimensions( int &nWidth, int &nHeight ) const
	{
		// Chain to the device
		BaseClass::GetBackBufferDimensions( nWidth, nHeight );
	}
	virtual void MarkUnusedVertexFields( unsigned int nFlags, int nTexCoordCount, bool *pUnusedTexCoords );

public:
	// Methods of CShaderAPIBase
	virtual bool OnDeviceInit();
	virtual void OnDeviceShutdown();
	virtual void ReleaseShaderObjects();
	virtual void RestoreShaderObjects();
	virtual void BeginPIXEvent( unsigned long color, const char *szName );
	virtual void EndPIXEvent();
	virtual void AdvancePIXFrame();

public:
	// Methods of IShaderAPIDX8
	virtual void QueueResetRenderState();

	//
	// Abandon all hope ye who pass below this line which hasn't been ported.
	//

	// Sets the mode...
	bool SetMode( void* VD3DHWND, int nAdapter, const ShaderDeviceInfo_t &info );

	// Change the video mode after it's already been set.
	void ChangeVideoMode( const ShaderDeviceInfo_t &info );

	// Sets the default render state
	void SetDefaultState();

	// Methods to ask about particular state snapshots
	virtual bool IsTranslucent( StateSnapshot_t id ) const;
	virtual bool IsAlphaTested( StateSnapshot_t id ) const;
	virtual bool UsesVertexAndPixelShaders( StateSnapshot_t id ) const;
	virtual int CompareSnapshots( StateSnapshot_t snapshot0, StateSnapshot_t snapshot1 );

	// Computes the vertex format for a particular set of snapshot ids
	VertexFormat_t ComputeVertexFormat( int num, StateSnapshot_t* pIds ) const;
	VertexFormat_t ComputeVertexUsage( int num, StateSnapshot_t* pIds ) const;

	// What fields in the morph do we actually use?
	virtual MorphFormat_t ComputeMorphFormat( int numSnapshots, StateSnapshot_t* pIds ) const;

	// Uses a state snapshot
	void UseSnapshot( StateSnapshot_t snapshot );

	// Color state
	void Color3f( float r, float g, float b );
	void Color4f( float r, float g, float b, float a );
	void Color3fv( float const* c );
	void Color4fv( float const* c );

	void Color3ub( unsigned char r, unsigned char g, unsigned char b );
	void Color3ubv( unsigned char const* pColor );
	void Color4ub( unsigned char r, unsigned char g, unsigned char b, unsigned char a );
	void Color4ubv( unsigned char const* pColor );

	// Set the number of bone weights
	virtual void SetNumBoneWeights( int numBones );
	virtual void EnableHWMorphing( bool bEnable );

	// Sets the vertex and pixel shaders
	virtual void SetVertexShaderIndex( int vshIndex = -1 );
	virtual void SetPixelShaderIndex( int pshIndex = 0 );

	// Matrix state
	void MatrixMode( MaterialMatrixMode_t matrixMode );
	void PushMatrix();
	void PopMatrix();
	void LoadMatrix( float *m );
	void LoadBoneMatrix( int boneIndex, const float *m );
	void MultMatrix( float *m );
	void MultMatrixLocal( float *m );
	void GetMatrix( MaterialMatrixMode_t matrixMode, float *dst );
	void LoadIdentity( void );
	void LoadCameraToWorld( void );
	void Ortho( double left, double top, double right, double bottom, double zNear, double zFar );
	void PerspectiveX( double fovx, double aspect, double zNear, double zFar );
	void PerspectiveOffCenterX( double fovx, double aspect, double zNear, double zFar, double bottom, double top, double left, double right );
	void PickMatrix( int x, int y, int width, int height );
	void Rotate( float angle, float x, float y, float z );
	void Translate( float x, float y, float z );
	void Scale( float x, float y, float z );
	void ScaleXY( float x, float y );

	// Binds a particular material to render with
	void Bind( IMaterial* pMaterial );
	IMaterialInternal* GetBoundMaterial();

	// Level of anisotropic filtering
	virtual void SetAnisotropicLevel( int nAnisotropyLevel );

	virtual void	SyncToken( const char *pToken );

	// Cull mode
	void CullMode( MaterialCullMode_t cullMode );

	// Force writes only when z matches. . . useful for stenciling things out
	// by rendering the desired Z values ahead of time.
	void ForceDepthFuncEquals( bool bEnable );

	// Turns off Z buffering
	void OverrideDepthEnable( bool bEnable, bool bDepthEnable );

	void OverrideAlphaWriteEnable( bool bOverrideEnable, bool bAlphaWriteEnable );
	void OverrideColorWriteEnable( bool bOverrideEnable, bool bColorWriteEnable );

	void SetHeightClipZ( float z );
	void SetHeightClipMode( enum MaterialHeightClipMode_t heightClipMode );

	void SetClipPlane( int index, const float *pPlane );
	void EnableClipPlane( int index, bool bEnable );
	
	void SetFastClipPlane(const float *pPlane);
	void EnableFastClip(bool bEnable);
	
	// The shade mode
	void ShadeMode( ShaderShadeMode_t mode );

	// Vertex blend state
	void SetVertexBlendState( int numBones );

	// Gets the dynamic mesh
	IMesh* GetDynamicMesh( IMaterial* pMaterial, int nHWSkinBoneCount, bool buffered,
		IMesh* pVertexOverride, IMesh* pIndexOverride );
	IMesh* GetDynamicMeshEx( IMaterial* pMaterial, VertexFormat_t vertexFormat, int nHWSkinBoneCount, 
		bool bBuffered, IMesh* pVertexOverride, IMesh* pIndexOverride );
	IMesh *GetFlexMesh();

	// Returns the number of vertices we can render using the dynamic mesh
	virtual void GetMaxToRender( IMesh *pMesh, bool bMaxUntilFlush, int *pMaxVerts, int *pMaxIndices );
	virtual int GetMaxVerticesToRender( IMaterial *pMaterial );
	virtual int GetMaxIndicesToRender( );

	// Draws the mesh
	void DrawMesh( CMeshBase* mesh );

	// modifies the vertex data when necessary
	void ModifyVertexData( );

	// Draws
	void BeginPass( StateSnapshot_t snapshot  );
	void RenderPass( int nPass, int nPassCount );

	// We use smaller dynamic VBs during level transitions, to free up memory
	virtual int  GetCurrentDynamicVBSize( void );
	virtual void DestroyVertexBuffers( bool bExitingLevel = false );

	void SetVertexDecl( VertexFormat_t vertexFormat, bool bHasColorMesh, bool bUsingFlex, bool bUsingMorph );

	// Sets the constant register for vertex and pixel shaders
	FORCEINLINE void SetVertexShaderConstantInternal( int var, float const* pVec, int numVecs = 1, bool bForce = false );

	void SetVertexShaderConstant( int var, float const* pVec, int numVecs = 1, bool bForce = false );
	void SetBooleanVertexShaderConstant( int var, BOOL const* pVec, int numBools = 1, bool bForce = false );
	void SetIntegerVertexShaderConstant( int var, int const* pVec, int numIntVecs = 1, bool bForce = false );
	
	void SetPixelShaderConstant( int var, float const* pVec, int numVecs = 1, bool bForce = false );
	FORCEINLINE void SetPixelShaderConstantInternal( int var, float const* pValues, int nNumConsts, bool bForce );

	void SetBooleanPixelShaderConstant( int var, BOOL const* pVec, int numBools = 1, bool bForce = false );
	void SetIntegerPixelShaderConstant( int var, int const* pVec, int numIntVecs = 1, bool bForce = false );

	void InvalidateDelayedShaderConstants( void );

	// Returns the nearest supported format
	ImageFormat GetNearestSupportedFormat( ImageFormat fmt, bool bFilteringRequired = true ) const;
	ImageFormat GetNearestRenderTargetFormat( ImageFormat format ) const;
	virtual bool DoRenderTargetsNeedSeparateDepthBuffer() const;

	// stuff that shouldn't be used from within a shader 
    void ModifyTexture( ShaderAPITextureHandle_t textureHandle );
	void BindTexture( Sampler_t sampler, ShaderAPITextureHandle_t textureHandle );
	virtual void BindVertexTexture( VertexTextureSampler_t nStage, ShaderAPITextureHandle_t textureHandle );
	void DeleteTexture( ShaderAPITextureHandle_t textureHandle );

	void WriteTextureToFile( ShaderAPITextureHandle_t hTexture, const char *szFileName );

	bool IsTexture( ShaderAPITextureHandle_t textureHandle );
	bool IsTextureResident( ShaderAPITextureHandle_t textureHandle );
	FORCEINLINE bool TextureIsAllocated( ShaderAPITextureHandle_t hTexture )
	{
		return m_Textures.IsValidIndex( hTexture ) && ( GetTexture( hTexture ).m_Flags & Texture_t::IS_ALLOCATED );
	}
	FORCEINLINE void AssertValidTextureHandle( ShaderAPITextureHandle_t textureHandle )
	{
#ifdef _DEBUG
		Assert( TextureIsAllocated( textureHandle ) );
#endif
	}

	// Lets the shader know about the full-screen texture so it can 
	virtual void SetFullScreenTextureHandle( ShaderAPITextureHandle_t h );

	virtual void SetLinearToGammaConversionTextures( ShaderAPITextureHandle_t hSRGBWriteEnabledTexture, ShaderAPITextureHandle_t hIdentityTexture );

	// Set the render target to a texID.
	// Set to SHADER_RENDERTARGET_BACKBUFFER if you want to use the regular framebuffer.
	void SetRenderTarget( ShaderAPITextureHandle_t colorTextureHandle = SHADER_RENDERTARGET_BACKBUFFER, 
		ShaderAPITextureHandle_t depthTextureHandle = SHADER_RENDERTARGET_DEPTHBUFFER );
	// Set the render target to a texID.
	// Set to SHADER_RENDERTARGET_BACKBUFFER if you want to use the regular framebuffer.
	void SetRenderTargetEx( int nRenderTargetID, ShaderAPITextureHandle_t colorTextureHandle = SHADER_RENDERTARGET_BACKBUFFER, 
		ShaderAPITextureHandle_t depthTextureHandle = SHADER_RENDERTARGET_DEPTHBUFFER );

	// These are bound to the texture, not the texture environment
	void TexMinFilter( ShaderTexFilterMode_t texFilterMode );
	void TexMagFilter( ShaderTexFilterMode_t texFilterMode );
	void TexWrap( ShaderTexCoordComponent_t coord, ShaderTexWrapMode_t wrapMode );
	void TexSetPriority( int priority );
	void TexLodClamp( int finest );
	void TexLodBias( float bias );

	ShaderAPITextureHandle_t CreateTextureHandle( void );
	void CreateTextureHandles( ShaderAPITextureHandle_t *handles, int count );

	ShaderAPITextureHandle_t CreateTexture( 
		int width, 
		int height,
		int depth,
		ImageFormat dstImageFormat, 
		int numMipLevels, 
		int numCopies, 
		int creationFlags, 
		const char *pDebugName,
		const char *pTextureGroupName );

	// Create a multi-frame texture (equivalent to calling "CreateTexture" multiple times, but more efficient)
	void CreateTextures( 
		ShaderAPITextureHandle_t *pHandles,
		int count,
		int width, 
		int height,
		int depth,
		ImageFormat dstImageFormat, 
		int numMipLevels, 
		int numCopies, 
		int flags, 
		const char *pDebugName,
		const char *pTextureGroupName );

	ShaderAPITextureHandle_t CreateDepthTexture( 
		ImageFormat renderTargetFormat, 
		int width, 
		int height, 
		const char *pDebugName,
		bool bTexture );

	void TexImage2D( 
		int level, 
		int cubeFaceID, 
		ImageFormat dstFormat, 
		int zOffset, 
		int width, 
		int height, 
		ImageFormat srcFormat, 
		bool bSrcIsTiled,
		void *imageData );

	void TexSubImage2D( 
		int level, 
		int cubeFaceID, 
		int xOffset, 
		int yOffset, 
		int zOffset, 
		int width, 
		int height,
		ImageFormat srcFormat, 
		int srcStride, 
		bool bSrcIsTiled,
		void *imageData );

	void TexImageFromVTF( IVTFTexture *pVTF, int iVTFFrame );

	bool TexLock( int level, int cubeFaceID, int xOffset, int yOffset, int width, int height, CPixelWriter& writer );
	void TexUnlock( );

	// stuff that isn't to be used from within a shader
	// what's the best way to hide this? subclassing?
	virtual void ClearBuffersObeyStencil( bool bClearColor, bool bClearDepth );
	virtual void ClearBuffersObeyStencilEx( bool bClearColor, bool bClearAlpha, bool bClearDepth );
	virtual void PerformFullScreenStencilOperation( void );
	void ReadPixels( int x, int y, int width, int height, unsigned char *data, ImageFormat dstFormat );
	virtual void ReadPixels( Rect_t *pSrcRect, Rect_t *pDstRect, unsigned char *data, ImageFormat dstFormat, int nDstStride );

	// Gets the current buffered state... (debug only)
	void GetBufferedState( BufferedState_t& state );

	// Buffered primitives
	void FlushBufferedPrimitives();
	void FlushBufferedPrimitivesInternal( );

	// Make sure we finish drawing everything that has been requested 
	void FlushHardware();
	
	// Use this to begin and end the frame
	void BeginFrame();
	void EndFrame();

	// Used to clear the transition table when we know it's become invalid.
	void ClearSnapshots();

	// Backward compat
	virtual int GetActualTextureStageCount() const;
	virtual int GetActualSamplerCount() const;
	virtual int StencilBufferBits() const;
	virtual bool IsAAEnabled() const;	// Is antialiasing being used?
	virtual bool OnAdapterSet( );
	bool m_bAdapterSet;

	void UpdateFastClipUserClipPlane( void );
	bool ReadPixelsFromFrontBuffer() const;

	// returns the current time in seconds....
	double CurrentTime() const;

	// Get the current camera position in world space.
	void GetWorldSpaceCameraPosition( float* pPos ) const;

	// Fog methods
	void FogMode( MaterialFogMode_t fogMode );
	void FogStart( float fStart );
	void FogEnd( float fEnd );
	void FogMaxDensity( float flMaxDensity );
	void SetFogZ( float fogZ );
	void GetFogDistances( float *fStart, float *fEnd, float *fFogZ );

	void SceneFogMode( MaterialFogMode_t fogMode );
	MaterialFogMode_t GetSceneFogMode( );
	MaterialFogMode_t GetPixelFogMode( );
	int GetPixelFogCombo( );//0 is either range fog, or no fog simulated with rigged range fog values. 1 is height fog
	bool ShouldUsePixelFogForMode( MaterialFogMode_t fogMode );
	void SceneFogColor3ub( unsigned char r, unsigned char g, unsigned char b );
	void GetSceneFogColor( unsigned char *rgb );
	void GetSceneFogColor( unsigned char *r, unsigned char *g, unsigned char *b );
	
	// Selection mode methods
	int  SelectionMode( bool selectionMode );
	void SelectionBuffer( unsigned int* pBuffer, int size );
	void ClearSelectionNames( );
	void LoadSelectionName( int name );
	void PushSelectionName( int name );
	void PopSelectionName();
	bool IsInSelectionMode() const;
	void RegisterSelectionHit( float minz, float maxz );
	void WriteHitRecord();

	// Binds a standard texture
	virtual void BindStandardTexture( Sampler_t sampler, StandardTextureId_t id );
	virtual void BindStandardVertexTexture( VertexTextureSampler_t sampler, StandardTextureId_t id );
	virtual void GetStandardTextureDimensions( int *pWidth, int *pHeight, StandardTextureId_t id );

	// Gets the lightmap dimensions
	virtual void GetLightmapDimensions( int *w, int *h );

	// Use this to get the mesh builder that allows us to modify vertex data
	CMeshBuilder* GetVertexModifyBuilder();

	virtual bool InFlashlightMode() const;
	virtual bool InEditorMode() const;

	// Gets the bound morph's vertex format; returns 0 if no morph is bound
	virtual MorphFormat_t GetBoundMorphFormat();

	// Helper to get at the texture state stage
	TextureStageState_t& TextureStage( int stage ) { return m_DynamicState.m_TextureStage[stage]; }
	const TextureStageState_t& TextureStage( int stage ) const { return m_DynamicState.m_TextureStage[stage]; }
	SamplerState_t& SamplerState( int nSampler ) { return m_DynamicState.m_SamplerState[nSampler]; }
	const SamplerState_t& SamplerState( int nSampler ) const { return m_DynamicState.m_SamplerState[nSampler]; }

	void SetAmbientLight( float r, float g, float b );
	void SetLight( int lightNum, const LightDesc_t& desc );
	void SetLightingOrigin( Vector vLightingOrigin );
	void DisableAllLocalLights();
	void SetAmbientLightCube( Vector4D colors[6] );
	float GetAmbientLightCubeLuminance( void );

	int GetMaxLights( void ) const;
	const LightDesc_t& GetLight( int lightNum ) const;

	void SetVertexShaderStateAmbientLightCube();
	void SetPixelShaderStateAmbientLightCube( int pshReg, bool bForceToBlack = false );

	void CopyRenderTargetToTexture( ShaderAPITextureHandle_t textureHandle );
	void CopyRenderTargetToTextureEx( ShaderAPITextureHandle_t textureHandle, int nRenderTargetID, Rect_t *pSrcRect = NULL, Rect_t *pDstRect = NULL );
	void CopyTextureToRenderTargetEx( int nRenderTargetID, ShaderAPITextureHandle_t textureHandle, Rect_t *pSrcRect = NULL, Rect_t *pDstRect = NULL );
	void CopyRenderTargetToScratchTexture( ShaderAPITextureHandle_t srcHandle, ShaderAPITextureHandle_t dstHandle, Rect_t *pSrcRect = NULL, Rect_t *pDstRect = NULL );

	virtual void LockRect( void** pOutBits, int* pOutPitch, ShaderAPITextureHandle_t texHandle, int mipmap, int x, int y, int w, int h, bool bWrite, bool bRead );
	virtual void UnlockRect( ShaderAPITextureHandle_t texHandle, int mipmap );

	virtual void CopyTextureToTexture( ShaderAPITextureHandle_t srcTex, ShaderAPITextureHandle_t dstTex );

	// Returns the cull mode (for fill rate computation)
	D3DCULL GetCullMode() const;
	void SetCullModeState( bool bEnable, D3DCULL nDesiredCullMode );
	void ApplyCullEnable( bool bEnable );

	// Alpha to coverage
	void ApplyAlphaToCoverage( bool bEnable );

#if defined( _X360 )
	void ApplySRGBReadState( int iTextureStage, bool bSRGBReadEnabled );
#endif

	// Applies Z Bias
	void ApplyZBias( const ShadowState_t& shaderState );

	// Applies texture enable
	void ApplyTextureEnable( const ShadowState_t& state, int stage );

	void ApplyFogMode( ShaderFogMode_t fogMode, bool bSRGBWritesEnabled, bool bDisableFogGammaCorrection );
	void UpdatePixelFogColorConstant( void );

	void EnabledSRGBWrite( bool bEnabled );

	// Gamma<->Linear conversions according to the video hardware we're running on
	float GammaToLinear_HardwareSpecific( float fGamma ) const;
	float LinearToGamma_HardwareSpecific( float fLinear ) const;

	// Applies alpha blending
	void ApplyAlphaBlend( bool bEnable, D3DBLEND srcBlend, D3DBLEND destBlend );

	// Applies alpha texture op
	void ApplyColorTextureStage( int stage, D3DTEXTUREOP op, int arg1, int arg2 );
	void ApplyAlphaTextureStage( int stage, D3DTEXTUREOP op, int arg1, int arg2 );

	// Sets texture stage stage + render stage state
	void SetSamplerState( int stage, D3DSAMPLERSTATETYPE state, DWORD val );
	void SetTextureStageState( int stage, D3DTEXTURESTAGESTATETYPE state, DWORD val);
	void SetRenderStateForce( D3DRENDERSTATETYPE state, DWORD val );
	void SetRenderState( D3DRENDERSTATETYPE state, DWORD val, 
						 bool bFlushBufferedPrimitivesIfChanged = false);

	// Scissor Rect
	void SetScissorRect( const int nLeft, const int nTop, const int nRight, const int nBottom, const bool bEnableScissor );
	// Can we download textures?
	virtual bool CanDownloadTextures() const;

	void ForceHardwareSync_WithManagedTexture();
	void ForceHardwareSync( void );
	void UpdateFrameSyncQuery( int queryIndex, bool bIssue );

	void EvictManagedResources();

	virtual void EvictManagedResourcesInternal();

	// Gets at a particular transform
	inline D3DXMATRIX& GetTransform( int i )
	{
		return *m_pMatrixStack[i]->GetTop();
	}

	int GetCurrentNumBones( void ) const;
	bool IsHWMorphingEnabled( ) const;
	int GetCurrentLightCombo( void ) const;				// Used for DX8 only
	void GetDX9LightState( LightState_t *state ) const;	// Used for DX9 only

	MaterialFogMode_t GetCurrentFogType( void ) const;

	void RecordString( const char *pStr );

	virtual bool IsRenderingMesh() const { return m_pRenderMesh != 0; }

	void SetTextureTransformDimension( TextureStage_t textureStage, int dimension, bool projected );
	void DisableTextureTransform( TextureStage_t textureMatrix );
	void SetBumpEnvMatrix( TextureStage_t textureStage, float m00, float m01, float m10, float m11 );

	int GetCurrentFrameCounter( void ) const
	{
		return m_CurrentFrame;
	}
 
	// Workaround hack for visualization of selection mode
	virtual void SetupSelectionModeVisualizationState();

	// Allocate and delete query objects.
	virtual ShaderAPIOcclusionQuery_t CreateOcclusionQueryObject( void );
	virtual void DestroyOcclusionQueryObject( ShaderAPIOcclusionQuery_t h );

	// Bracket drawing with begin and end so that we can get counts next frame.
	virtual void BeginOcclusionQueryDrawing( ShaderAPIOcclusionQuery_t h );
	virtual void EndOcclusionQueryDrawing( ShaderAPIOcclusionQuery_t h );

	// Get the number of pixels rendered between begin and end on an earlier frame.
	// Calling this in the same frame is a huge perf hit!
	virtual int OcclusionQuery_GetNumPixelsRendered( ShaderAPIOcclusionQuery_t h, bool bFlush );

	void SetFlashlightState( const FlashlightState_t &state, const VMatrix &worldToTexture );
	void SetFlashlightStateEx( const FlashlightState_t &state, const VMatrix &worldToTexture, ITexture *pFlashlightDepthTexture );
	const FlashlightState_t &GetFlashlightState( VMatrix &worldToTexture ) const;
	const FlashlightState_t &GetFlashlightStateEx( VMatrix &worldToTexture, ITexture **pFlashlightDepthTexture ) const;

	// Gets at the shadow state for a particular state snapshot
	virtual bool IsDepthWriteEnabled( StateSnapshot_t id ) const;

// IDebugTextureInfo implementation.

	virtual bool IsDebugTextureListFresh( int numFramesAllowed = 1 );
	virtual void EnableDebugTextureList( bool bEnable );
	virtual bool SetDebugTextureRendering( bool bEnable );
	virtual void EnableGetAllTextures( bool bEnable );
	virtual KeyValues* GetDebugTextureList();
	virtual int GetTextureMemoryUsed( TextureMemoryType eTextureMemory );

	virtual void ClearVertexAndPixelShaderRefCounts();
	virtual void PurgeUnusedVertexAndPixelShaders();

	// Called when the dx support level has changed
	virtual void DXSupportLevelChanged();

	// User clip plane override
	virtual void EnableUserClipTransformOverride( bool bEnable );
	virtual void UserClipTransform( const VMatrix &worldToProjection );

	bool UsingSoftwareVertexProcessing() const;

	// Mark all user clip planes as being dirty
	void MarkAllUserClipPlanesDirty();

	// Converts a D3DXMatrix to a VMatrix and back
	void D3DXMatrixToVMatrix( const D3DXMATRIX &in, VMatrix &out );
	void VMatrixToD3DXMatrix( const VMatrix &in, D3DXMATRIX &out );

	ITexture *GetRenderTargetEx( int nRenderTargetID );

	virtual void SetToneMappingScaleLinear( const Vector &scale );
	virtual const Vector &GetToneMappingScaleLinear( void ) const;
	float GetLightMapScaleFactor( void ) const;

	void SetFloatRenderingParameter(int parm_number, float value);

	void SetIntRenderingParameter(int parm_number, int value);
	void SetVectorRenderingParameter(int parm_number, Vector const &value);

	float GetFloatRenderingParameter(int parm_number) const;

	int GetIntRenderingParameter(int parm_number) const;

	Vector GetVectorRenderingParameter(int parm_number) const;

	// For dealing with device lost in cases where Present isn't called all the time (Hammer)
	virtual void HandleDeviceLost();

	virtual void EnableLinearColorSpaceFrameBuffer( bool bEnable );

	virtual void SetPSNearAndFarZ( int pshReg );

	// stencil methods
	void SetStencilEnable(bool onoff);
	void SetStencilFailOperation(StencilOperation_t op);
	void SetStencilZFailOperation(StencilOperation_t op);
	void SetStencilPassOperation(StencilOperation_t op);
	void SetStencilCompareFunction(StencilComparisonFunction_t cmpfn);
	void SetStencilReferenceValue(int ref);
	void SetStencilTestMask(uint32 msk);
	void SetStencilWriteMask(uint32 msk);
	void ClearStencilBufferRectangle(int xmin, int ymin, int xmax, int ymax,int value);

	virtual void GetDXLevelDefaults(uint &max_dxlevel,uint &recommended_dxlevel);

#if defined( _X360 )
	HXUIFONT OpenTrueTypeFont( const char *pFontname, int tall, int style );
	void CloseTrueTypeFont( HXUIFONT hFont );
	bool GetTrueTypeFontMetrics( HXUIFONT hFont, XUIFontMetrics *pFontMetrics, XUICharMetrics charMetrics[256] );
	// Render a sequence of characters and extract the data into a buffer
	// For each character, provide the width+height of the font texture subrect,
	// an offset to apply when rendering the glyph, and an offset into a buffer to receive the RGBA data
	bool GetTrueTypeGlyphs( HXUIFONT hFont, int numChars, wchar_t *pWch, int *pOffsetX, int *pOffsetY, int *pWidth, int *pHeight, unsigned char *pRGBA, int *pRGBAOffset );
	ShaderAPITextureHandle_t CreateRenderTargetSurface( int width, int height, ImageFormat format, const char *pDebugName, const char *pTextureGroupName );
	void PersistDisplay();
	bool PostQueuedTexture( const void *pData, int nSize, ShaderAPITextureHandle_t *pHandles, int nHandles, int nWidth, int nHeight, int nDepth, int nMips, int *pRefCount );
	void *GetD3DDevice();

	void PushVertexShaderGPRAllocation( int iVertexShaderCount = 64 );
	void PopVertexShaderGPRAllocation( void );

	void EnableVSync_360( bool bEnable );
#endif

	virtual bool OwnGPUResources( bool bEnable );

// ------------ New Vertex/Index Buffer interface ----------------------------
	void BindVertexBuffer( int streamID, IVertexBuffer *pVertexBuffer, int nOffsetInBytes, int nFirstVertex, int nVertexCount, VertexFormat_t fmt, int nRepetitions = 1 );
	void BindIndexBuffer( IIndexBuffer *pIndexBuffer, int nOffsetInBytes );
	void Draw( MaterialPrimitiveType_t primitiveType, int nFirstIndex, int nIndexCount );

	// Draw the mesh with the currently bound vertex and index buffers.
	void DrawWithVertexAndIndexBuffers( void );
// ------------ End ----------------------------

	// deformations
	virtual void PushDeformation( const DeformationBase_t *pDeformation );
	virtual void PopDeformation( );
	virtual int GetNumActiveDeformations( ) const ;


	// for shaders to set vertex shader constants. returns a packed state which can be used to set the dynamic combo
	virtual int GetPackedDeformationInformation( int nMaskOfUnderstoodDeformations,
												 float *pConstantValuesOut,
												 int nBufferSize,
												 int nMaximumDeformations,
												 int *pNumDefsOut ) const ;

	inline Texture_t &GetTexture( ShaderAPITextureHandle_t hTexture )
	{
		return m_Textures[hTexture];
	}

	// Gets the texture 
	IDirect3DBaseTexture* GetD3DTexture( ShaderAPITextureHandle_t hTexture );


	virtual bool ShouldWriteDepthToDestAlpha( void ) const;

	virtual void AcquireThreadOwnership();
	virtual void ReleaseThreadOwnership();
private:
	enum
	{
		SMALL_BACK_BUFFER_SURFACE_WIDTH = 256,
		SMALL_BACK_BUFFER_SURFACE_HEIGHT = 256,
	};

	bool m_bEnableDebugTextureList;
	bool m_bDebugGetAllTextures;
	bool m_bDebugTexturesRendering;
	KeyValues *m_pDebugTextureList;
	int m_nTextureMemoryUsedLastFrame, m_nTextureMemoryUsedTotal;
	int m_nTextureMemoryUsedPicMip1, m_nTextureMemoryUsedPicMip2;
	int m_nDebugDataExportFrame;

	FlashlightState_t m_FlashlightState;
	VMatrix m_FlashlightWorldToTexture;
	ITexture *m_pFlashlightDepthTexture;

	CShaderAPIDx8( CShaderAPIDx8 const& );

	enum
	{
		INVALID_TRANSITION_OP = 0xFFFF
	};

	// State transition table for the device is as follows:

	// Other app init causes transition from OK to OtherAppInit, during transition we must release resources
	// !Other app init causes transition from OtherAppInit to OK, during transition we must restore resources
	// Minimized or device lost or device not reset causes transition from OK to LOST_DEVICE, during transition we must release resources
	// Minimized or device lost or device not reset causes transition from OtherAppInit to LOST_DEVICE

	// !minimized AND !device lost causes transition from LOST_DEVICE to NEEDS_RESET
	// minimized or device lost causes transition from NEEDS_RESET to LOST_DEVICE

	// Successful TryDeviceReset and !Other app init causes transition from NEEDS_RESET to OK, during transition we must restore resources
	// Successful TryDeviceReset and Other app init causes transition from NEEDS_RESET to OtherAppInit

	void ExportTextureList();
	void AddBufferToTextureList( const char *pName, D3DSURFACE_DESC &desc );

	void SetupTextureGroup( ShaderAPITextureHandle_t hTexture, const char *pTextureGroupName );

	// Creates the matrix stack
	void CreateMatrixStacks();

	// Initializes the render state
	void InitRenderState( );

	// Resets all dx renderstates to dx default so that our shadows are correct.
	void ResetDXRenderState( );
	
	// Resets the render state
	void ResetRenderState( bool bFullReset = true );

	// Setup standard vertex shader constants (that don't change)
	void SetStandardVertexShaderConstants( float fOverbright );
	
	// Initializes vertex and pixel shaders
	void InitVertexAndPixelShaders();

	// Discards the vertex and index buffers
	void DiscardVertexBuffers();

	// Computes the fill rate
	void ComputeFillRate();

	// Takes a snapshot
	virtual StateSnapshot_t TakeSnapshot( );

	// Converts the clear color to be appropriate for HDR
	D3DCOLOR GetActualClearColor( D3DCOLOR clearColor );

	// We lost the device
	void OnDeviceLost();

	// Gets the matrix stack from the matrix mode
	int GetMatrixStack( MaterialMatrixMode_t mode ) const;

	// Flushes the matrix state, returns false if we don't need to
	// do any more work
	bool MatrixIsChanging( TransformType_t transform = TRANSFORM_IS_GENERAL );

	// Updates the matrix transform state
	void UpdateMatrixTransform( TransformType_t transform = TRANSFORM_IS_GENERAL );

	// Sets the vertex shader modelView state..
	// NOTE: GetProjectionMatrix should only be called from the Commit functions!
	const D3DXMATRIX &GetProjectionMatrix( void );
	void SetVertexShaderViewProj();
	void SetVertexShaderModelViewProjAndModelView();

	void SetPixelShaderFogParams( int reg );
	void SetPixelShaderFogParams( int reg, ShaderFogMode_t fogMode );

	FORCEINLINE void UpdateVertexShaderFogParams( void )
	{
		if ( g_pHardwareConfig->Caps().m_SupportsPixelShaders )
		{
			float ooFogRange = 1.0f;

			float fStart = m_VertexShaderFogParams[0];
			float fEnd = m_VertexShaderFogParams[1];

			// Check for divide by zero
			if ( fEnd != fStart )
			{
				ooFogRange = 1.0f / ( fEnd - fStart );
			}

			float fogParams[4];
			fogParams[0] = ooFogRange * fEnd;
			fogParams[1] = 1.0f;
			fogParams[2] = 1.0f - clamp( m_flFogMaxDensity, 0.0f, 1.0f ); // Max fog density

			fogParams[3] = ooFogRange;

			float vertexShaderCameraPos[4];
			vertexShaderCameraPos[0] = m_WorldSpaceCameraPositon[0];
			vertexShaderCameraPos[1] = m_WorldSpaceCameraPositon[1];
			vertexShaderCameraPos[2] = m_WorldSpaceCameraPositon[2];
			vertexShaderCameraPos[3] = m_DynamicState.m_FogZ;  // waterheight

			// cFogEndOverFogRange, cFogOne, unused, cOOFogRange
			SetVertexShaderConstant( VERTEX_SHADER_FOG_PARAMS, fogParams, 1 );

			// eyepos.x eyepos.y eyepos.z cWaterZ
			SetVertexShaderConstant( VERTEX_SHADER_CAMERA_POS, vertexShaderCameraPos );
		}
	}

	// Compute stats info for a texture
	void ComputeStatsInfo( ShaderAPITextureHandle_t hTexture, bool isCubeMap, bool isVolumeTexture );

	// For procedural textures
	void AdvanceCurrentCopy( ShaderAPITextureHandle_t hTexture );

	// Deletes a D3D texture
	void DeleteD3DTexture( ShaderAPITextureHandle_t hTexture );

	// Unbinds a texture
	void UnbindTexture( ShaderAPITextureHandle_t hTexture );

	// Releases all D3D textures
	void ReleaseAllTextures();

	// Deletes all textures
	void DeleteAllTextures();

	// Gets the currently modified texture handle
	ShaderAPITextureHandle_t GetModifyTextureHandle() const;

	// Gets the bind id
	ShaderAPITextureHandle_t GetBoundTextureBindId( Sampler_t sampler ) const;
	
	// If mat_texture_limit is enabled, then this tells us if binding the specified texture would
	// take us over the limit.
	bool WouldBeOverTextureLimit( ShaderAPITextureHandle_t hTexture );

	// Sets the texture state
	void SetTextureState( Sampler_t sampler, ShaderAPITextureHandle_t hTexture, bool force = false );

	// Grab/release the internal render targets such as the back buffer and the save game thumbnail
	void AcquireInternalRenderTargets();
	void ReleaseInternalRenderTargets();

	// create/release linear->gamma table texture lookups. Only used by hardware supporting pixel shader 2b and up
	void AcquireLinearToGammaTableTextures();
	void ReleaseLinearToGammaTableTextures();

	// Gets the texture being modified
	IDirect3DBaseTexture* GetModifyTexture();
	void SetModifyTexture( IDirect3DBaseTexture *pTex );

	// returns true if we're using texture coordinates at a given stage
	bool IsUsingTextureCoordinates( int stage, int flags ) const;

	// Returns true if the board thinks we're generating spheremap coordinates
	bool IsSpheremapRenderStateActive( int stage ) const;

	// Returns true if we're modulating constant color into the vertex color
	bool IsModulatingVertexColor() const;

	// Recomputes ambient light cube
	void RecomputeAmbientLightCube( );

	// Debugging spew
	void SpewBoardState();

	// Compute and save the world space camera position.
	void CacheWorldSpaceCameraPosition();

	// Compute and save the projection atrix with polyoffset built in if we need it.
	void CachePolyOffsetProjectionMatrix();

	// Vertex shader helper functions
	int  FindVertexShader( VertexFormat_t fmt, char const* pFileName ) const;
	int  FindPixelShader( char const* pFileName ) const;

	// Returns copies of the front and back buffers
	IDirect3DSurface* GetFrontBufferImage( ImageFormat& format );
	IDirect3DSurface* GetBackBufferImage( Rect_t *pSrcRect, Rect_t *pDstRect, ImageFormat& format );
	IDirect3DSurface* GetBackBufferImageHDR( Rect_t *pSrcRect, Rect_t *pDstRect, ImageFormat& format );

	// Copy bits from a host-memory surface
	void CopyBitsFromHostSurface( IDirect3DSurface* pSurfaceBits, 
		const Rect_t &dstRect, unsigned char *pData, ImageFormat srcFormat, ImageFormat dstFormat, int nDstStride );

	FORCEINLINE void SetTransform( D3DTRANSFORMSTATETYPE State, CONST D3DXMATRIX *pMatrix )
	{
#if !defined( _X360 )
		Dx9Device()->SetTransform( State, pMatrix );
#endif
	}

	FORCEINLINE void SetLight( DWORD Index, CONST D3DLIGHT9 *pLight )
	{
#if !defined( _X360 )
		Dx9Device()->SetLight( Index, pLight );
#endif
	}

	FORCEINLINE void LightEnable( DWORD LightIndex, bool bEnable )
	{
#if !defined( _X360 )
		Dx9Device()->LightEnable( LightIndex, bEnable );
#endif
	}


	void ExecuteCommandBuffer( uint8 *pCmdBuffer );
	void SetStandardTextureHandle( StandardTextureId_t nId, ShaderAPITextureHandle_t );

	// Methods related to queuing functions to be called per-(pMesh->Draw call) or per-pass
	void ClearAllCommitFuncs( CommitFuncType_t func, CommitShaderType_t shader );
	void CallCommitFuncs( CommitFuncType_t func, CommitShaderType_t shader, bool bForce );
	bool IsCommitFuncInUse( CommitFuncType_t func, CommitShaderType_t shader, int nFunc ) const;
	void MarkCommitFuncInUse( CommitFuncType_t func, CommitShaderType_t shader, int nFunc );
	void AddCommitFunc( CommitFuncType_t func, CommitShaderType_t shader, StateCommitFunc_t f );
	void CallCommitFuncs( CommitFuncType_t func, bool bUsingFixedFunction, bool bForce = false );

	// Commits transforms and lighting
	void CommitStateChanges();

	// Commits transforms that have to be dealt with on a per pass basis (ie. projection matrix for polyoffset)
	void CommitPerPassStateChanges( StateSnapshot_t id );

	// Need to handle fog mode on a per-pass basis
	void CommitPerPassFogMode( bool bUsingVertexAndPixelShaders );

	void CommitPerPassXboxFixups();

	// Commits user clip planes
	void CommitUserClipPlanes( bool bUsingFixedFunction );

	// Gets the user clip transform (world->view)
	D3DXMATRIX & GetUserClipTransform( );

	// transform commit
	bool VertexShaderTransformChanged( int i );
	bool FixedFunctionTransformChanged( int i );

	void UpdateVertexShaderMatrix( int iMatrix );
	void SetVertexShaderStateSkinningMatrices();
	void CommitVertexShaderTransforms();
	void CommitPerPassVertexShaderTransforms();

	void UpdateFixedFunctionMatrix( int iMatrix );
	void SetFixedFunctionStateSkinningMatrices();
	void CommitFixedFunctionTransforms();
	void CommitPerPassFixedFunctionTransforms();

	// Recomputes the fast-clip plane matrices based on the current fast-clip plane
	void CommitFastClipPlane( );

	// Computes a matrix which includes the poly offset given an initial projection matrix
	void ComputePolyOffsetMatrix( const D3DXMATRIX& matProjection, D3DXMATRIX &matProjectionOffset );

	void SetSkinningMatrices();
	
	// lighting commit
	bool VertexShaderLightingChanged( int i );
	bool VertexShaderLightingEnableChanged( int i );
	bool FixedFunctionLightingChanged( int i );
	bool FixedFunctionLightingEnableChanged( int i );
	VertexShaderLightTypes_t ComputeLightType( int i ) const;
	void SortLights( int* index );
	void CommitVertexShaderLighting();
	void CommitPixelShaderLighting( int pshReg );
	void CommitFixedFunctionLighting();

	// Gets the surface associated with a texture (refcount of surface is increased)
	IDirect3DSurface* GetTextureSurface( ShaderAPITextureHandle_t textureHandle );
	IDirect3DSurface* GetDepthTextureSurface( ShaderAPITextureHandle_t textureHandle );

	//
	// Methods related to hardware config
	//
	void SetDefaultConfigValuesForDxLevel( int dxLevelFromCaps, ShaderDeviceInfo_t &info, unsigned int nFlagsUsed );

	// Determines hardware capabilities
	bool DetermineHardwareCaps( );

	// Alpha To Coverage entrypoints and states - much of this involves vendor-dependent paths and states...
	bool CheckVendorDependentAlphaToCoverage();
	void EnableAlphaToCoverage();
	void DisableAlphaToCoverage();

	// Vendor-dependent shadow mapping detection
	void CheckVendorDependentShadowMappingSupport( bool &bSupportsShadowDepthTextures, bool &bSupportsFetch4 );

	// Override caps based on a requested dx level
	void OverrideCaps( int nForcedDXLevel );

	// Reports support for a given MSAA mode
	bool SupportsMSAAMode( int nMSAAMode );

	// Reports support for a given CSAA mode
	bool SupportsCSAAMode( int nNumSamples, int nQualityLevel );

	// Gamma correction of fog color, or not...
	D3DCOLOR ComputeGammaCorrectedFogColor( unsigned char r, unsigned char g, unsigned char b, bool bSRGBWritesEnabled );

	void SetDefaultMaterial();

	bool RestorePersistedDisplay( bool bUseFrontBuffer );

	void ClearStdTextureHandles( void );

	// debug logging
	void PrintfVA( char *fmt, va_list vargs );
	void Printf( const char *fmt, ... );	
	float Knob( char *knobname, float *setvalue = NULL );

	// "normal" back buffer and depth buffer.  Need to keep this around so that we
	// know what to set the render target to when we are done rendering to a texture.
	IDirect3DSurface	*m_pBackBufferSurface;
	IDirect3DSurface	*m_pBackBufferSurfaceSRGB;
	IDirect3DSurface	*m_pZBufferSurface;

	// Optimization for screenshots
	IDirect3DSurface	*m_pSmallBackBufferFP16TempSurface;

	ShaderAPITextureHandle_t m_hFullScreenTexture;

	ShaderAPITextureHandle_t m_hLinearToGammaTableTexture;
	ShaderAPITextureHandle_t m_hLinearToGammaTableIdentityTexture;

	//
	// State needed at the time of rendering (after snapshots have been applied)
	//

	// Interface for the D3DXMatrixStack
	ID3DXMatrixStack*	m_pMatrixStack[NUM_MATRIX_MODES];
	matrix3x4_t			m_boneMatrix[NUM_MODEL_TRANSFORMS];
	int					m_maxBoneLoaded;

	// Current matrix mode
	D3DTRANSFORMSTATETYPE m_MatrixMode;
	int m_CurrStack;

	// The current camera position in world space.
	Vector4D m_WorldSpaceCameraPositon;

	// The current projection matrix with polyoffset baked into it.
	D3DXMATRIX		m_CachedPolyOffsetProjectionMatrix;
	D3DXMATRIX		m_CachedFastClipProjectionMatrix;
	D3DXMATRIX		m_CachedFastClipPolyOffsetProjectionMatrix;

	// The texture stage state that changes frequently
	DynamicState_t	m_DynamicState;

	// The *desired* dynamic state. Most dynamic state is committed into actual hardware state
	// at either per-pass or per-material time.	This can also be used to force the hardware
	// to match the desired state after returning from alt-tab.
	DynamicState_t	m_DesiredState;

	// A list of state commit functions to run as per-draw call commit time
	unsigned char m_pCommitFlags[COMMIT_FUNC_TYPE_COUNT][COMMIT_SHADER_TYPE_COUNT][ COMMIT_FUNC_BYTE_COUNT ];
	CUtlVector< StateCommitFunc_t >	m_CommitFuncs[COMMIT_FUNC_TYPE_COUNT][COMMIT_SHADER_TYPE_COUNT];

	// Render data
	CMeshBase *m_pRenderMesh;
	int m_nDynamicVBSize;
	IMaterialInternal *m_pMaterial;

	// Shadow depth bias states
	float m_fShadowSlopeScaleDepthBias;
	float m_fShadowDepthBias;

	bool		m_bReadPixelsEnabled;

	// Render-to-texture stuff...
	bool		m_UsingTextureRenderTarget;

	int			m_ViewportMaxWidth;
	int			m_ViewportMaxHeight;

	ShaderAPITextureHandle_t m_hCachedRenderTarget;
	bool		m_bUsingSRGBRenderTarget;

	// Ambient cube map ok?
	int m_CachedAmbientLightCube;

	// The current frame
	int m_CurrentFrame;

	// The texture we're currently modifying
	ShaderAPITextureHandle_t m_ModifyTextureHandle;
	char m_ModifyTextureLockedLevel;
	unsigned char m_ModifyTextureLockedFace;

	// Stores all textures
	CUtlFixedLinkedList< Texture_t >	m_Textures;

	// Mesh builder used to modify vertex data
	CMeshBuilder m_ModifyBuilder;

	float			m_VertexShaderFogParams[2];
	float			m_flFogMaxDensity;

	// Shadow state transition table
	CTransitionTable m_TransitionTable;
	StateSnapshot_t m_nCurrentSnapshot;

	// Depth test override...
	bool	m_bOverrideMaterialIgnoreZ;
	bool 	m_bIgnoreZValue;

	// Are we in the middle of resetting the render state?
	bool	m_bResettingRenderState;

	// Can we buffer 2 frames ahead?
	bool	m_bBuffer2FramesAhead;

	// Selection name stack
	CUtlStack< int >	m_SelectionNames;
	bool				m_InSelectionMode;
	unsigned int*		m_pSelectionBufferEnd;
	unsigned int*		m_pSelectionBuffer;
	unsigned int*		m_pCurrSelectionRecord;
	float				m_SelectionMinZ;
	float				m_SelectionMaxZ;
	int					m_NumHits;

	// fog
	unsigned char m_SceneFogColor[3];
	MaterialFogMode_t m_SceneFogMode;

	// Tone Mapping state ( w is gamma scale )
	Vector4D m_ToneMappingScale;

	Deformation_t m_DeformationStack[DEFORMATION_STACK_DEPTH];

	Deformation_t *m_pDeformationStackPtr;

	void WriteShaderConstantsToGPU();

	// rendering parameter storage
	int IntRenderingParameters[MAX_INT_RENDER_PARMS];
	float FloatRenderingParameters[MAX_FLOAT_RENDER_PARMS];
	Vector VectorRenderingParameters[MAX_VECTOR_RENDER_PARMS];

	ShaderAPITextureHandle_t m_StdTextureHandles[TEXTURE_MAX_STD_TEXTURES];
	
	// PIX instrumentation utlities...enable these with PIX_INSTRUMENTATION
	void StartPIXInstrumentation();
	void EndPIXInstrumentation();
	void SetPIXMarker( unsigned long color, const char *szName );
	void IncrementPIXError();
	bool PIXError();
	int m_nPIXErrorCount;
	int m_nPixFrame;
	bool m_bPixCapturing;

	void ComputeVertexDescription( unsigned char* pBuffer, VertexFormat_t vertexFormat, MeshDesc_t& desc ) const
	{
		return MeshMgr()->ComputeVertexDescription( pBuffer, vertexFormat, desc );
	}

	// Reports support for shadow depth texturing
	bool SupportsShadowDepthTextures( void );
	bool SupportsFetch4( void );

	void SetShadowDepthBiasFactors( float fShadowSlopeScaleDepthBias, float fShadowDepthBias );

	// Vendor-dependent depth stencil texture format
	ImageFormat GetShadowDepthTextureFormat( void );

	bool SupportsBorderColor( void ) const;
	bool SupportsFetch4( void ) const;
	
	void EnableBuffer2FramesAhead( bool bEnable );

	void SetDepthFeatheringPixelShaderConstant( int iConstant, float fDepthBlendScale );

	void SetDisallowAccess( bool b )
	{
		g_bShaderAccessDisallowed = b;
	}

	void EnableShaderShaderMutex( bool b )
	{
		Assert( g_ShaderMutex.GetOwnerId() == 0 );
		g_bUseShaderMutex = b;
	}

	void ShaderLock()
	{
		g_ShaderMutex.Lock();
	}

	void ShaderUnlock()
	{
		g_ShaderMutex.Unlock();
	}

	// Vendor-dependent slim texture format
	ImageFormat GetNullTextureFormat( void );

	//The idea behind a delayed constant is this.
	// Some shaders set constants based on rendering states, and some rendering states aren't updated until after a shader's already called Draw().
	//  So, for some functions that are state based, we save the constant we set and if the state changes between when it's set in the shader setup code 
	//   and when the shader is drawn, we update that constant.
	struct DelayedConstants_t
	{
		int iPixelShaderFogParams;

		void Invalidate( void )
		{
			iPixelShaderFogParams = -1;
		}
		DelayedConstants_t( void ) { this->Invalidate(); }
	};
	DelayedConstants_t m_DelayedShaderConstants;

	bool SetRenderTargetInternalXbox( ShaderAPITextureHandle_t hTexture, bool bForce = false );

#if defined( _X360 )
	CUtlStack<int> m_VertexShaderGPRAllocationStack;
#endif

	int	m_MaxVectorVertexShaderConstant;	
	int	m_MaxBooleanVertexShaderConstant;
	int	m_MaxIntegerVertexShaderConstant;
	int	m_MaxVectorPixelShaderConstant;	
	int	m_MaxBooleanPixelShaderConstant;
	int	m_MaxIntegerPixelShaderConstant;

	bool m_bGPUOwned;
	bool m_bResetRenderStateNeeded;

#ifdef ENABLE_NULLREF_DEVICE_SUPPORT
	bool m_NullDevice;
#endif
};


//-----------------------------------------------------------------------------
// Class Factory
//-----------------------------------------------------------------------------
static CShaderAPIDx8 g_ShaderAPIDX8;
IShaderAPIDX8 *g_pShaderAPIDX8 = &g_ShaderAPIDX8;
CShaderDeviceDx8* g_pShaderDeviceDx8 = &g_ShaderAPIDX8;

// FIXME: Remove IShaderAPI + IShaderDevice; they change after SetMode
EXPOSE_SINGLE_INTERFACE_GLOBALVAR( CShaderAPIDx8, IShaderAPI, 
				SHADERAPI_INTERFACE_VERSION, g_ShaderAPIDX8 )

EXPOSE_SINGLE_INTERFACE_GLOBALVAR( CShaderAPIDx8, IShaderDevice, 
				SHADER_DEVICE_INTERFACE_VERSION, g_ShaderAPIDX8 )

EXPOSE_SINGLE_INTERFACE_GLOBALVAR( CShaderAPIDx8, IDebugTextureInfo, 
				DEBUG_TEXTURE_INFO_VERSION, g_ShaderAPIDX8 )

//-----------------------------------------------------------------------------
// Accessors for major interfaces
//-----------------------------------------------------------------------------

// Pix wants a max of 32 characters
// We'll give it the right-most substrings separated by slashes
static char s_pPIXMaterialName[32];
void PIXifyName( char *pDst, int destSize, const char *pSrc )
{
	char *pSrcWalk = (char *)pSrc;

	while ( V_strlen( pSrcWalk ) > 31 )						// While we still have too many characters
	{
		char *pTok = strpbrk( pSrcWalk, "/\\" );			// Find next token

		if ( pTok )
			pSrcWalk = pTok + 1;
		else
			break;
	}

	V_strncpy( pDst, pSrcWalk, min( 32, destSize ) );
}

static int AdjustUpdateRange( float const* pVec, void const *pOut, int numVecs, int* pSkip )
{
	int skip = 0;
	uint32* pSrc = (uint32*)pVec;
	uint32* pDst = (uint32*)pOut;
	while( numVecs && !( ( pSrc[0] ^ pDst[0] ) | ( pSrc[1] ^ pDst[1] ) | ( pSrc[2] ^ pDst[2] ) | ( pSrc[3] ^ pDst[3] ) ) )
	{
		pSrc += 4;
		pDst += 4;
		numVecs--;
		skip++;
	}
	*pSkip = skip;
	if ( !numVecs )
		return 0;

	uint32* pSrcLast = pSrc + numVecs * 4 - 4;
	uint32* pDstLast = pDst + numVecs * 4 - 4;
	while( numVecs > 1 && !( ( pSrcLast[0] ^ pDstLast[0] ) | ( pSrcLast[1] ^ pDstLast[1] ) | ( pSrcLast[2] ^ pDstLast[2] ) | ( pSrcLast[3] ^ pDstLast[3] ) ) )
	{
		pSrcLast -= 4;
		pDstLast -= 4;
		numVecs--;
	}

	return numVecs;
}

//-----------------------------------------------------------------------------
// Constructor, destructor
//-----------------------------------------------------------------------------
CShaderAPIDx8::CShaderAPIDx8() :
	m_Textures( 32 ),
	m_CurrStack( -1 ),
	m_ModifyTextureHandle( INVALID_SHADERAPI_TEXTURE_HANDLE ),
	m_pRenderMesh( 0 ),
	m_nDynamicVBSize( DYNAMIC_VERTEX_BUFFER_MEMORY ),
	m_pMaterial( NULL ),
	m_CurrentFrame( 0 ),
	m_CachedAmbientLightCube( STATE_CHANGED ),
	m_InSelectionMode( false ),
	m_SelectionMinZ( FLT_MAX ),
	m_SelectionMaxZ( FLT_MIN ),
	m_pSelectionBuffer( 0 ),
	m_pSelectionBufferEnd( 0 ),
	m_bResetRenderStateNeeded( false ),
	m_ModifyTextureLockedLevel( -1 ),
	m_nPixFrame(0),
	m_bPixCapturing(false),
	m_nPIXErrorCount(0),
	m_pBackBufferSurface( 0 ),
	m_pBackBufferSurfaceSRGB( 0 ),
	m_pZBufferSurface( 0 ),
	m_bResettingRenderState( false ),
	m_bReadPixelsEnabled( false ),
	m_ToneMappingScale( 1.0f, 1.0f, 1.0f, 1.0f ),
	m_hFullScreenTexture( INVALID_SHADERAPI_TEXTURE_HANDLE ),
	m_hLinearToGammaTableTexture( INVALID_SHADERAPI_TEXTURE_HANDLE ),
	m_hLinearToGammaTableIdentityTexture( INVALID_SHADERAPI_TEXTURE_HANDLE ),
	m_fShadowSlopeScaleDepthBias( 16.0f ),
	m_fShadowDepthBias( 0.00008f ),
	m_hCachedRenderTarget( INVALID_SHADERAPI_TEXTURE_HANDLE ),
	m_bUsingSRGBRenderTarget( false )
{
	// FIXME: Remove! Backward compat
	m_bAdapterSet = false;
	m_bBuffer2FramesAhead = false;
	m_bReadPixelsEnabled = true;

	memset( m_pMatrixStack, 0, sizeof(ID3DXMatrixStack*) * NUM_MATRIX_MODES );
	memset( &m_DynamicState, 0, sizeof(m_DynamicState) );
	//m_DynamicState.m_HeightClipMode = MATERIAL_HEIGHTCLIPMODE_DISABLE;
	m_nWindowHeight = m_nWindowWidth = 0;
	m_maxBoneLoaded = 0;

	m_bEnableDebugTextureList = 0;
	m_bDebugTexturesRendering = 0;
	m_pDebugTextureList = NULL;
	m_nTextureMemoryUsedLastFrame = 0;
	m_nTextureMemoryUsedTotal = 0;
	m_nTextureMemoryUsedPicMip1 = 0;
	m_nTextureMemoryUsedPicMip2 = 0;
	m_nDebugDataExportFrame = 0;

	m_SceneFogColor[0] = 0;
	m_SceneFogColor[1] = 0;
	m_SceneFogColor[2] = 0;
	m_SceneFogMode = MATERIAL_FOG_NONE;

	// We haven't yet detected whether we support CreateQuery or not yet.
	memset(IntRenderingParameters,0,sizeof(IntRenderingParameters));
	memset(FloatRenderingParameters,0,sizeof(FloatRenderingParameters));
	memset(VectorRenderingParameters,0,sizeof(VectorRenderingParameters));

	m_pDeformationStackPtr = m_DeformationStack + DEFORMATION_STACK_DEPTH;

	m_bGPUOwned = false;
	m_MaxVectorVertexShaderConstant = 0;
	m_MaxBooleanVertexShaderConstant = 0;
	m_MaxIntegerVertexShaderConstant = 0;
	m_MaxVectorPixelShaderConstant = 0;
	m_MaxBooleanPixelShaderConstant = 0;
	m_MaxIntegerPixelShaderConstant = 0;

	ClearStdTextureHandles();
	
	//Debugger();
#ifdef ENABLE_NULLREF_DEVICE_SUPPORT
	m_NullDevice = !!CommandLine()->FindParm( "-nulldevice" );
#endif
}

CShaderAPIDx8::~CShaderAPIDx8()
{
	if ( m_DynamicState.m_pVectorVertexShaderConstant )
	{
		delete[] m_DynamicState.m_pVectorVertexShaderConstant;
		m_DynamicState.m_pVectorVertexShaderConstant = NULL;
	}

	if ( m_DynamicState.m_pBooleanVertexShaderConstant )
	{
		delete[] m_DynamicState.m_pBooleanVertexShaderConstant;
		m_DynamicState.m_pBooleanVertexShaderConstant = NULL;
	}

	if ( m_DynamicState.m_pIntegerVertexShaderConstant )
	{
		delete[] m_DynamicState.m_pIntegerVertexShaderConstant;
		m_DynamicState.m_pIntegerVertexShaderConstant = NULL;
	}

	if ( m_DynamicState.m_pVectorPixelShaderConstant )
	{
		delete[] m_DynamicState.m_pVectorPixelShaderConstant;
		m_DynamicState.m_pVectorPixelShaderConstant = NULL;
	}

	if ( m_DynamicState.m_pBooleanPixelShaderConstant )
	{
		delete[] m_DynamicState.m_pBooleanPixelShaderConstant;
		m_DynamicState.m_pBooleanPixelShaderConstant = NULL;
	}

	if ( m_DynamicState.m_pIntegerPixelShaderConstant )
	{
		delete[] m_DynamicState.m_pIntegerPixelShaderConstant;
		m_DynamicState.m_pIntegerPixelShaderConstant = NULL;
	}

	if ( m_pDebugTextureList )
	{
		m_pDebugTextureList->deleteThis();
		m_pDebugTextureList = NULL;
	}
}


void CShaderAPIDx8::ClearStdTextureHandles( void )
{
	for(int i = 0 ; i < ARRAYSIZE( m_StdTextureHandles ) ; i++ )
		m_StdTextureHandles[i] = INVALID_SHADERAPI_TEXTURE_HANDLE;
}


//-----------------------------------------------------------------------------
// FIXME: Remove! Backward compat.
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::OnAdapterSet()
{
	if ( !DetermineHardwareCaps( ) )
		return false;

	// Modify the caps based on requested DXlevels
	int nForcedDXLevel = CommandLine()->ParmValue( "-dxlevel", 0 );
	
	if ( nForcedDXLevel > 0 )
	{
		nForcedDXLevel = MAX( nForcedDXLevel, ABSOLUTE_MINIMUM_DXLEVEL );
	}
	

	// FIXME: Check g_pHardwareConfig->ActualCaps() for a preferred DX level
	OverrideCaps( nForcedDXLevel );

	m_bAdapterSet = true;
	return true;
}


void CShaderAPIDx8::GetDXLevelDefaults(uint &max_dxlevel,uint &recommended_dxlevel)
{
	max_dxlevel=g_pHardwareConfig->ActualCaps().m_nMaxDXSupportLevel;
	recommended_dxlevel=g_pHardwareConfig->ActualCaps().m_nDXSupportLevel;
}

//-----------------------------------------------------------------------------
// Can we download textures?
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::CanDownloadTextures() const
{
	if ( IsDeactivated() )
		return false;

	return IsActive();
}


//-----------------------------------------------------------------------------
// Grab the render targets
//-----------------------------------------------------------------------------
void CShaderAPIDx8::AcquireInternalRenderTargets()
{
	GLMPRINTF(( ">-A- CShaderAPIDx8::AcquireInternalRenderTargets... "));
	if ( mat_debugalttab.GetBool() )
	{
		Warning( "mat_debugalttab: CShaderAPIDx8::AcquireInternalRenderTargets\n" );
	}

	if ( !m_pBackBufferSurface )
	{
		Dx9Device()->GetRenderTarget( 0, &m_pBackBufferSurface );
#ifdef ENABLE_NULLREF_DEVICE_SUPPORT
		if( !m_NullDevice )
#endif
		{
			Assert( m_pBackBufferSurface );
		}
	}

#if defined( _X360 )
	if ( !m_pBackBufferSurfaceSRGB )
	{
		// create a SRGB back buffer clone
		int backWidth, backHeight;
		ShaderAPI()->GetBackBufferDimensions( backWidth, backHeight );
		D3DFORMAT backBufferFormat = ImageLoader::ImageFormatToD3DFormat( g_pShaderDevice->GetBackBufferFormat() );
		m_pBackBufferSurfaceSRGB = g_TextureHeap.AllocRenderTargetSurface( backWidth, backHeight, (D3DFORMAT)MAKESRGBFMT( backBufferFormat ), true, 0 );
	}
#endif

#ifdef ENABLE_NULLREF_DEVICE_SUPPORT
	if ( !m_pZBufferSurface && !m_NullDevice )
#else
	if ( !m_pZBufferSurface )
#endif
	{
		Dx9Device()->GetDepthStencilSurface( &m_pZBufferSurface );
		Assert( m_pZBufferSurface );
	}
	GLMPRINTF(( "<-A- CShaderAPIDx8::AcquireInternalRenderTargets...complete "));
}


//-----------------------------------------------------------------------------
// Release the render targets
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ReleaseInternalRenderTargets( )
{
	GLMPRINTF(( ">-A- CShaderAPIDx8::ReleaseInternalRenderTargets... "));
	if( mat_debugalttab.GetBool() )
	{
		Warning( "mat_debugalttab: CShaderAPIDx8::ReleaseInternalRenderTargets\n" );
	}

	// Note: This function does not release renderable textures created elsewhere
	//       Those should be released separately via the texure manager
	if ( m_pBackBufferSurface )
	{
#if POSIX
		// dxabstract's AddRef/Release have optional args to help track usage
		int nRetVal = m_pBackBufferSurface->Release( 0, "-B  CShaderAPIDx8::ReleaseInternalRenderTargets public release color buffer");
#else
		int nRetVal = m_pBackBufferSurface->Release();
#endif
		//Assert( nRetVal == 0 );
		m_pBackBufferSurface = NULL;
	}

	if ( m_pZBufferSurface )
	{
#if POSIX
		// dxabstract's AddRef/Release have optional args to help track usage
		int nRetVal = m_pZBufferSurface->Release( 0, "-B  CShaderAPIDx8::ReleaseInternalRenderTargets public release zbuffer");
#else
		int nRetVal = m_pZBufferSurface->Release();
#endif

		//Assert( nRetVal == 0 );		//FIXME not sure why we're seeing a refcount of 3 here
		m_pZBufferSurface = NULL;
	}
	GLMPRINTF(( "<-A- CShaderAPIDx8::ReleaseInternalRenderTargets... complete"));
}

//-----------------------------------------------------------------------------
// During init, places the persisted texture back into the back buffer.
// The initial 360 fixup present will then not flash. This routine has to be
// self contained, no other shader api systems are viable during init.
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::RestorePersistedDisplay( bool bUseFrontBuffer )
{
#if defined( _X360 )
	if ( !( XboxLaunch()->GetLaunchFlags() & LF_INTERNALLAUNCH ) )
	{
		// there is no persisted screen
		return false;
	}

	OwnGPUResources( false );

	const char *strVertexShaderProgram = 
		" float4x4 matWVP : register(c0);"  
		" struct VS_IN"  
		" {" 
		" float4 ObjPos : POSITION;"
		" float2 TexCoord : TEXCOORD;"
		" };" 
		" struct VS_OUT" 
		" {" 
		" float4 ProjPos : POSITION;" 
		" float2 TexCoord : TEXCOORD;"
		" };"  
		" VS_OUT main( VS_IN In )"  
		" {"  
		" VS_OUT Out; "  
		" Out.ProjPos = mul( matWVP, In.ObjPos );"
		" Out.TexCoord = In.TexCoord;"
		" return Out;"  
		" }";

	const char *strPixelShaderProgram = 
		" struct PS_IN"
		" {"
		" float2 TexCoord : TEXCOORD;"
		" };"
		" sampler detail;"
		" float4 main( PS_IN In ) : COLOR"  
		" {"  
		" return tex2D( detail, In.TexCoord );"
		" }"; 

	D3DVERTEXELEMENT9 VertexElements[3] =
	{
		{ 0,  0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },
		{ 0, 12, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 },
		D3DDECL_END()
	};


	IDirect3DTexture *pTexture;
	if ( bUseFrontBuffer )
	{
		Dx9Device()->GetFrontBuffer( &pTexture );
	}
	else
	{
		// 360 holds a persistent image across restarts
		Dx9Device()->GetPersistedTexture( &pTexture );
	}

	ID3DXBuffer *pErrorMsg = NULL;
	ID3DXBuffer *pShaderCode = NULL;

	HRESULT hr = D3DXCompileShader( strVertexShaderProgram, (UINT)strlen( strVertexShaderProgram ), NULL, NULL, "main", "vs_2_0", 0, &pShaderCode, &pErrorMsg, NULL );
	if ( FAILED( hr ) )
	{
		return false;
	}
	IDirect3DVertexShader9 *pVertexShader;
	Dx9Device()->CreateVertexShader( (DWORD*)pShaderCode->GetBufferPointer(), &pVertexShader );
	pShaderCode->Release();

	pErrorMsg = NULL;
	pShaderCode = NULL;
	hr = D3DXCompileShader( strPixelShaderProgram, (UINT)strlen( strPixelShaderProgram ), NULL, NULL, "main", "ps_2_0", 0, &pShaderCode, &pErrorMsg, NULL );
	if ( FAILED(hr) )
	{
		return false;
	}
	IDirect3DPixelShader9 *pPixelShader;
	Dx9Device()->CreatePixelShader( (DWORD*)pShaderCode->GetBufferPointer(), &pPixelShader );
	pShaderCode->Release();

	int w, h;
	GetBackBufferDimensions( w, h );

	// Create a vertex declaration from the element descriptions.
	IDirect3DVertexDeclaration9 *pVertexDecl;
	Dx9Device()->CreateVertexDeclaration( VertexElements, &pVertexDecl );
	XMMATRIX matWVP = XMMatrixOrthographicOffCenterLH( 0, (FLOAT)w, (FLOAT)h, 0, 0, 1 );

	ConVarRef mat_monitorgamma( "mat_monitorgamma" );
	ConVarRef mat_monitorgamma_tv_range_min( "mat_monitorgamma_tv_range_min" );
	ConVarRef mat_monitorgamma_tv_range_max( "mat_monitorgamma_tv_range_max" );
	ConVarRef mat_monitorgamma_tv_exp( "mat_monitorgamma_tv_exp" );
	ConVarRef mat_monitorgamma_tv_enabled( "mat_monitorgamma_tv_enabled" );
	g_pShaderDeviceDx8->SetHardwareGammaRamp( mat_monitorgamma.GetFloat(), mat_monitorgamma_tv_range_min.GetFloat(), mat_monitorgamma_tv_range_max.GetFloat(),
		mat_monitorgamma_tv_exp.GetFloat(), mat_monitorgamma_tv_enabled.GetBool() );

	// Structure to hold vertex data.
	struct COLORVERTEX
	{
		FLOAT       Position[3];
		float       TexCoord[2];
	};
	COLORVERTEX Vertices[4];

	Vertices[0].Position[0] = 0;
	Vertices[0].Position[1] = 0;
	Vertices[0].Position[2] = 0;
	Vertices[0].TexCoord[0] = 0;
	Vertices[0].TexCoord[1] = 0;

	Vertices[1].Position[0] = w-1;
	Vertices[1].Position[1] = 0;
	Vertices[1].Position[2] = 0;
	Vertices[1].TexCoord[0] = 1;
	Vertices[1].TexCoord[1] = 0;

	Vertices[2].Position[0] = w-1;
	Vertices[2].Position[1] = h-1;
	Vertices[2].Position[2] = 0;
	Vertices[2].TexCoord[0] = 1;
	Vertices[2].TexCoord[1] = 1;

	Vertices[3].Position[0] = 0;
	Vertices[3].Position[1] = h-1;
	Vertices[3].Position[2] = 0;
	Vertices[3].TexCoord[0] = 0;
	Vertices[3].TexCoord[1] = 1;

	Dx9Device()->SetTexture( 0, pTexture );
	Dx9Device()->SetVertexShader( pVertexShader );
	Dx9Device()->SetPixelShader( pPixelShader );
	Dx9Device()->SetVertexShaderConstantF( 0, (FLOAT*)&matWVP, 4 );
	Dx9Device()->SetVertexDeclaration( pVertexDecl );
	Dx9Device()->DrawPrimitiveUP( D3DPT_QUADLIST, 1, Vertices, sizeof( COLORVERTEX ) );

	Dx9Device()->SetVertexShader( NULL );
	Dx9Device()->SetPixelShader( NULL );
	Dx9Device()->SetTexture( 0, NULL );
	Dx9Device()->SetVertexDeclaration( NULL );

	pVertexShader->Release();
	pPixelShader->Release();
	pVertexDecl->Release();
	pTexture->Release();

	OwnGPUResources( true );

	return true;
#else
	return false;
#endif
}

//-----------------------------------------------------------------------------
// Initialize, shutdown the Device....
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::OnDeviceInit() 
{
	AcquireInternalRenderTargets();
	
	g_pHardwareConfig->CapsForEdit().m_TextureMemorySize = g_pShaderDeviceMgrDx8->GetVidMemBytes( m_nAdapter );

	CreateMatrixStacks();

	// Hide the cursor
	RECORD_COMMAND( DX8_SHOW_CURSOR, 1 );
	RECORD_INT( false );

#if !defined( _X360 )
	Dx9Device()->ShowCursor( false );
#endif

	// Initialize the shader manager
	ShaderManager()->Init();

	// Initialize the shader shadow
	ShaderShadow()->Init();

	// Initialize the mesh manager
	MeshMgr()->Init();

	bool bToolsMode = IsWindows() && ( CommandLine()->CheckParm( "-tools" ) != NULL );

	// Use fat vertices when running in tools
	MeshMgr()->UseFatVertices( bToolsMode );

	// Initialize the transition table.
	m_TransitionTable.Init();

	// Initialize the render state
	InitRenderState();

	// Clear the z and color buffers
	ClearBuffers( true, true, true, -1, -1 );

	AllocFrameSyncObjects();
	AllocNonInteractiveRefreshObjects();

	RECORD_COMMAND( DX8_BEGIN_SCENE, 0 );

	// Apply mandatory initialization HW fixups, GPU state will be left as expected
	if ( IsX360() )
	{
		// place the possible persisted display into the back buffer, ready for present()
		RestorePersistedDisplay( false );

		// 360 MUST perform an initial swap to stabilize the state
		// this ensures any states (e.g. gamma) are respected
		// without this, the 360 resets to internal default state on the first swap
		OwnGPUResources( false );
		Dx9Device()->Present( 0, 0, 0, 0 );

		// present corrupts the GPU state and back buffer (according to docs)
		// re-clear the back buffer in order to re-establish the expected contents
		ResetRenderState( false );
		ClearBuffers( true, true, true, -1, -1 );

		// place the front buffer image in the back buffer, later systems will detect and grab
		// other systems will detect and grab
		RestorePersistedDisplay( true );
	}

	Dx9Device()->BeginScene();

	return true;
}

void CShaderAPIDx8::OnDeviceShutdown() 
{
	if ( !IsPC() || !IsActive() )
		return;

	// Deallocate all textures
	DeleteAllTextures();

	// Release render targets
	ReleaseInternalRenderTargets();

	// Free objects that are used for frame syncing.
	FreeFrameSyncObjects();
	FreeNonInteractiveRefreshObjects();

	for (int i = 0; i < NUM_MATRIX_MODES; ++i)
	{
		if (m_pMatrixStack[i])
		{
			int ref = m_pMatrixStack[i]->Release();
			Assert( ref == 0 );
		}
	}

	// Shutdown the transition table.
	m_TransitionTable.Shutdown();

	MeshMgr()->Shutdown();

	ShaderManager()->Shutdown();

	ReleaseAllVertexDecl( );
}


//-----------------------------------------------------------------------------
// Sets the mode...
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::SetMode( void* VD3DHWND, int nAdapter, const ShaderDeviceInfo_t &info )
{
	//
	// FIXME: Note that this entire function is backward compat and will go soon
	//

	bool bRestoreNeeded = false;

	if ( IsActive() )
	{
		ReleaseResources();
		OnDeviceShutdown();
		ShutdownDevice();
		bRestoreNeeded = true;
	}

	LOCK_SHADERAPI();
	Assert( D3D() );
	Assert( nAdapter < g_pShaderDeviceMgr->GetAdapterCount() );

	const HardwareCaps_t& actualCaps = g_pShaderDeviceMgr->GetHardwareCaps( nAdapter );

	ShaderDeviceInfo_t actualInfo = info;
	int nDXLevel = actualInfo.m_nDXLevel ? actualInfo.m_nDXLevel : actualCaps.m_nDXSupportLevel;

	static bool bSetModeOnce = false;
	if ( !bSetModeOnce )
	{
		nDXLevel = MAX( ABSOLUTE_MINIMUM_DXLEVEL, CommandLine()->ParmValue( "-dxlevel", nDXLevel ) );
		bSetModeOnce = true;
	}
	if ( nDXLevel > actualCaps.m_nMaxDXSupportLevel )
	{
		nDXLevel = actualCaps.m_nMaxDXSupportLevel;
	}
	actualInfo.m_nDXLevel = g_pShaderDeviceMgr->GetClosestActualDXLevel( nDXLevel );

	if ( !g_pShaderDeviceMgrDx8->ValidateMode( nAdapter, actualInfo ) )
		return false;

	g_pShaderAPI = this;
	g_pShaderDevice = this;
	g_pShaderShadow = ShaderShadow();
	bool bOk = InitDevice( VD3DHWND, nAdapter, actualInfo );
	if ( !bOk )
		return false;

	if ( !OnDeviceInit() )
		return false;

	if ( bRestoreNeeded && IsPC() )
	{
		ReacquireResources();
	}

	return true;
}


//-----------------------------------------------------------------------------
// Creates the matrix stack
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CreateMatrixStacks()
{
	MEM_ALLOC_D3D_CREDIT();

	for (int i = 0; i < NUM_MATRIX_MODES; ++i)
	{
		HRESULT hr = D3DXCreateMatrixStack( 0, &m_pMatrixStack[i] );
		Assert( hr == D3D_OK );
	}
}


//-----------------------------------------------------------------------------
// Vendor-dependent code to turn on alpha to coverage
//-----------------------------------------------------------------------------
void CShaderAPIDx8::EnableAlphaToCoverage( void )
{
	if( !g_pHardwareConfig->ActualCaps().m_bSupportsAlphaToCoverage || !IsAAEnabled() )
		return;

	D3DRENDERSTATETYPE renderState = (D3DRENDERSTATETYPE)g_pHardwareConfig->Caps().m_AlphaToCoverageState;
	SetRenderState( renderState, g_pHardwareConfig->Caps().m_AlphaToCoverageEnableValue );	// Vendor dependent state
}

//-----------------------------------------------------------------------------
// Vendor-dependent code to turn off alpha to coverage
//-----------------------------------------------------------------------------
void CShaderAPIDx8::DisableAlphaToCoverage()
{
	if( !g_pHardwareConfig->ActualCaps().m_bSupportsAlphaToCoverage || !IsAAEnabled() )
		return;

	D3DRENDERSTATETYPE renderState = (D3DRENDERSTATETYPE)g_pHardwareConfig->Caps().m_AlphaToCoverageState;
	SetRenderState( renderState, g_pHardwareConfig->Caps().m_AlphaToCoverageDisableValue );	// Vendor dependent state
}

//-----------------------------------------------------------------------------
// Determine capabilities
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::DetermineHardwareCaps( )
{
	HardwareCaps_t& actualCaps = g_pHardwareConfig->ActualCapsForEdit();
	if ( !g_pShaderDeviceMgrDx8->ComputeCapsFromD3D( &actualCaps, m_DisplayAdapter ) )
		return false;

	// See if the file tells us otherwise
	g_pShaderDeviceMgrDx8->ReadDXSupportLevels( actualCaps );

	// Read dxsupport.cfg which has config overrides for particular cards.
	g_pShaderDeviceMgrDx8->ReadHardwareCaps( actualCaps, actualCaps.m_nMaxDXSupportLevel );

	// What's in "-shader" overrides dxsupport.cfg
	const char *pShaderParam = CommandLine()->ParmValue( "-shader" );
	if ( pShaderParam )
	{
		Q_strncpy( actualCaps.m_pShaderDLL, pShaderParam, sizeof( actualCaps.m_pShaderDLL ) );
	}

	return true;
}


//-----------------------------------------------------------------------------
// Override caps based on a particular dx level
//-----------------------------------------------------------------------------
void CShaderAPIDx8::OverrideCaps( int nForcedDXLevel )
{
	// Just use the actual caps if we can't use what was requested or if the default is requested
	if ( nForcedDXLevel <= 0 ) 
	{
		nForcedDXLevel = g_pHardwareConfig->ActualCaps().m_nDXSupportLevel;
	}
	nForcedDXLevel = g_pShaderDeviceMgr->GetClosestActualDXLevel( nForcedDXLevel );

	g_pHardwareConfig->SetupHardwareCaps( nForcedDXLevel, g_pHardwareConfig->ActualCaps() );
}


//-----------------------------------------------------------------------------
// Called when the dx support level has changed
//-----------------------------------------------------------------------------
void CShaderAPIDx8::DXSupportLevelChanged()
{
	LOCK_SHADERAPI();
	if ( IsPC() )
	{
		OverrideCaps( ShaderUtil()->GetConfig().dxSupportLevel );
	}
	else
	{
		Assert( 0 );
	}
}


//-----------------------------------------------------------------------------
// FIXME: Remove! Backward compat only
//-----------------------------------------------------------------------------
int CShaderAPIDx8::GetActualTextureStageCount() const
{
	return g_pHardwareConfig->GetActualTextureStageCount();
}

int CShaderAPIDx8::GetActualSamplerCount() const
{
	return g_pHardwareConfig->GetActualSamplerCount();
}

int CShaderAPIDx8::StencilBufferBits() const
{
	return m_bUsingStencil ? m_iStencilBufferBits : 0;
}

bool CShaderAPIDx8::IsAAEnabled() const
{
	bool bAntialiasing = ( m_PresentParameters.MultiSampleType != D3DMULTISAMPLE_NONE );
	return bAntialiasing;
}


//-----------------------------------------------------------------------------
// Methods related to queuing functions to be called per-(pMesh->Draw call) or per-pass
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::IsCommitFuncInUse( CommitFuncType_t func, CommitShaderType_t shader, int nFunc ) const
{
	Assert( nFunc < COMMIT_FUNC_COUNT );
	return ( m_pCommitFlags[func][shader][ nFunc >> 3 ] & ( 1 << ( nFunc & 0x7 ) ) ) != 0;
}

void CShaderAPIDx8::MarkCommitFuncInUse( CommitFuncType_t func, CommitShaderType_t shader, int nFunc )
{
	m_pCommitFlags[func][shader][ nFunc >> 3 ] |= 1 << ( nFunc & 0x7 );
}

void CShaderAPIDx8::AddCommitFunc( CommitFuncType_t func, CommitShaderType_t shader, StateCommitFunc_t f )
{
	m_CommitFuncs[func][shader].AddToTail( f );
}


//-----------------------------------------------------------------------------
// Clears all commit functions
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ClearAllCommitFuncs( CommitFuncType_t func, CommitShaderType_t shader )
{
	memset( m_pCommitFlags[func][shader], 0, COMMIT_FUNC_BYTE_COUNT );
	m_CommitFuncs[func][shader].RemoveAll();
}


//-----------------------------------------------------------------------------
// Calls all commit functions in a particular list
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CallCommitFuncs( CommitFuncType_t func, CommitShaderType_t shader, bool bForce )
{
	// 360 does not have have a FF pipe
	Assert ( IsPC() || ( IsX360() && shader != COMMIT_FIXED_FUNCTION ) );

	// Don't bother committing anything if we're deactivated
	if ( IsDeactivated() )
		return;

	CUtlVector< StateCommitFunc_t > &funcList =	m_CommitFuncs[func][shader];
	int nCount = funcList.Count();
	if ( nCount == 0 )
		return;

	for ( int i = 0; i < nCount; ++i )
	{
		funcList[i]( Dx9Device(), m_DesiredState, m_DynamicState, bForce );
	}
	ClearAllCommitFuncs( func, shader );
}


//-----------------------------------------------------------------------------
// Calls all per-mesh draw commit functions
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CallCommitFuncs( CommitFuncType_t func, bool bUsingFixedFunction, bool bForce )
{
	// Fixed-Function only funcs
	if ( IsPC() && ( bUsingFixedFunction || bForce ) )
	{
		CallCommitFuncs( func, COMMIT_FIXED_FUNCTION, bForce );
	}

	// Vertex-shader only funcs
	if ( !bUsingFixedFunction || bForce )
	{
		CallCommitFuncs( func, COMMIT_VERTEX_SHADER, bForce );
	}

	// State set in both FF + VS
	CallCommitFuncs( func, COMMIT_ALWAYS, bForce );
}

//-----------------------------------------------------------------------------
// Sets the sampler state
//-----------------------------------------------------------------------------
static FORCEINLINE void SetSamplerState( IDirect3DDevice9 *pDevice, int stage, D3DSAMPLERSTATETYPE state, DWORD val )
{
	RECORD_SAMPLER_STATE( stage, state, val ); 

#if defined( _X360 )
	if ( state == D3DSAMP_NOTSUPPORTED )
		return;
#endif

	pDevice->SetSamplerState( stage, state, val );
}

inline void CShaderAPIDx8::SetSamplerState( int stage, D3DSAMPLERSTATETYPE state, DWORD val )
{
#ifndef DX_TO_GL_ABSTRACTION
	if ( IsDeactivated() )
		return;
#endif

	::SetSamplerState( Dx9Device(), stage, state, val );
}

//-----------------------------------------------------------------------------
// Sets the texture stage state
//-----------------------------------------------------------------------------
inline void CShaderAPIDx8::SetTextureStageState( int stage, D3DTEXTURESTAGESTATETYPE state, DWORD val )
{
#if !defined( _X360 )
	if ( IsDeactivated() )
		return;

	Dx9Device()->SetTextureStageState( stage, state, val );
#endif
}

inline void CShaderAPIDx8::SetRenderState( D3DRENDERSTATETYPE state, DWORD val, bool bFlushIfChanged )
{
#if ( !defined( _X360 ) && !defined( DX_TO_GL_ABSTRACTION ) )
	{
		if ( IsDeactivated() )
			return;
	}
#else
	{
		Assert( state != D3DRS_NOTSUPPORTED ); //Use SetSupportedRenderState() macro to avoid this at compile time
		//if ( state == D3DRS_NOTSUPPORTED )
		//	return;
	}
#endif

	Assert( state >= 0 && ( int )state < MAX_NUM_RENDERSTATES );
	if ( m_DynamicState.m_RenderState[state] != val )
	{
		if ( bFlushIfChanged )
		{
			FlushBufferedPrimitives();
		}
#ifdef DX_TO_GL_ABSTRACTION
		Dx9Device()->SetRenderStateInline( state, val );
#else
		Dx9Device()->SetRenderState( state, val );
#endif
		m_DynamicState.m_RenderState[state] = val;
	}
}

#ifdef DX_TO_GL_ABSTRACTION
	// Purposely always writing the new state (even if it's not changed), in case SetRenderStateConstInline() compiles away to nothing (it sometimes does)
	#define SetRenderStateConstMacro(t, state, val )						\
		do																	\
		{																	\
			Assert( state >= 0 && ( int )state < MAX_NUM_RENDERSTATES );	\
			if ( t->m_DynamicState.m_RenderState[state] != (DWORD)val )		\
			{																\
				Dx9Device()->SetRenderStateConstInline( state, val );		\
			}																\
			t->m_DynamicState.m_RenderState[state] = val;					\
		} while(0)
#else
	#define SetRenderStateConstMacro(t, state, val ) t->SetRenderState( state, val );
#endif

//-----------------------------------------------------------------------------
// Commits viewports
//-----------------------------------------------------------------------------
static void CommitSetScissorRect( IDirect3DDevice9 *pDevice, const DynamicState_t &desiredState, DynamicState_t &currentState, bool bForce )
{
	// Set the enable/disable renderstate

	bool bEnableChanged = desiredState.m_RenderState[D3DRS_SCISSORTESTENABLE] != currentState.m_RenderState[D3DRS_SCISSORTESTENABLE];
	if ( bEnableChanged )
	{
		Dx9Device()->SetRenderState( D3DRS_SCISSORTESTENABLE, desiredState.m_RenderState[D3DRS_SCISSORTESTENABLE] );
		currentState.m_RenderState[D3DRS_SCISSORTESTENABLE] = desiredState.m_RenderState[D3DRS_SCISSORTESTENABLE];
	}

	// Only bother with the rect if we're enabling
	if ( desiredState.m_RenderState[D3DRS_SCISSORTESTENABLE] )
	{
		int nWidth, nHeight;
		ITexture *pTexture = ShaderAPI()->GetRenderTargetEx( 0 );
		if ( pTexture == NULL )
		{
			ShaderAPI()->GetBackBufferDimensions( nWidth, nHeight );
		}
		else
		{
			nWidth  = pTexture->GetActualWidth();
			nHeight = pTexture->GetActualHeight();
		}

		Assert( (desiredState.m_ScissorRect.left <= nWidth) && (desiredState.m_ScissorRect.bottom <= nHeight) &&
			    ( desiredState.m_ScissorRect.top >= 0 ) && (desiredState.m_ScissorRect.left >= 0) );

		clamp( desiredState.m_ScissorRect.right,  0, nWidth );
		clamp( desiredState.m_ScissorRect.left,   0, nWidth );
		clamp( desiredState.m_ScissorRect.top,    0, nHeight );
		clamp( desiredState.m_ScissorRect.bottom, 0, nHeight );

		Dx9Device()->SetScissorRect( &desiredState.m_ScissorRect );
		currentState.m_ScissorRect = desiredState.m_ScissorRect;
	}
}

// Routine for setting scissor rect
// If pScissorRect is NULL, disable scissoring by setting the render state
// If pScissorRect is non-NULL, set the RECT state in Direct3D AND set the renderstate
inline void CShaderAPIDx8::SetScissorRect( const int nLeft, const int nTop, const int nRight, const int nBottom, const bool bEnableScissor )
{
	Assert( (nLeft <= nRight) && (nTop <= nBottom) ); //360 craps itself if this isn't true
	if ( !g_pHardwareConfig->Caps().m_bScissorSupported )
		return;

	// If we're turning it on, check the validity of the rect
	if ( bEnableScissor )
	{
		int nWidth, nHeight;
		ITexture *pTexture = GetRenderTargetEx( 0 );
		if ( pTexture == NULL )
		{
			GetBackBufferDimensions( nWidth, nHeight );
		}
		else
		{
			nWidth  = pTexture->GetActualWidth();
			nHeight = pTexture->GetActualHeight();
		}

		Assert( (nRight <= nWidth) && (nBottom <= nHeight) && ( nTop >= 0 ) && (nLeft >= 0) );

		clamp( nRight,  0, nWidth );
		clamp( nLeft,   0, nWidth );
		clamp( nTop,    0, nHeight );
		clamp( nBottom, 0, nHeight );
	}

	DWORD dwEnableScissor = bEnableScissor ? TRUE : FALSE;
	RECT newScissorRect;
	newScissorRect.left = nLeft;
	newScissorRect.top = nTop;
	newScissorRect.right = nRight;
	newScissorRect.bottom = nBottom;

	if ( !m_bResettingRenderState )
	{
		bool bEnableChanged = m_DesiredState.m_RenderState[D3DRS_SCISSORTESTENABLE] != dwEnableScissor;
		bool bRectChanged = memcmp( &newScissorRect, &m_DesiredState.m_ScissorRect, sizeof(RECT) ) != 0;

		if ( !bEnableChanged && !bRectChanged )
			return;
	}

	if ( !IsDeactivated() )
	{
		// Do we need to do this always?
		FlushBufferedPrimitives();
	}

	m_DesiredState.m_RenderState[D3DRS_SCISSORTESTENABLE] = dwEnableScissor;
	memcpy( &m_DesiredState.m_ScissorRect, &newScissorRect, sizeof(RECT) );

	ADD_COMMIT_FUNC( COMMIT_PER_DRAW, COMMIT_ALWAYS, CommitSetScissorRect );
}

inline void CShaderAPIDx8::SetRenderStateForce( D3DRENDERSTATETYPE state, DWORD val )
{
#if ( !defined( _X360 ) && !defined( DX_TO_GL_ABSTRACTION ) )
	{
		if ( IsDeactivated() )
			return;
	}
#else
	{
		Assert( state != D3DRS_NOTSUPPORTED ); //Use SetSupportedRenderStateForce() macro to avoid this at compile time
		//if ( state == D3DRS_NOTSUPPORTED )
		//	return;
	}
#endif

	Dx9Device()->SetRenderState( state, val );
	m_DynamicState.m_RenderState[state] = val;
}


//-----------------------------------------------------------------------------
// Set the values for pixel shader constants that don't change.
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetStandardVertexShaderConstants( float fOverbright )
{
	LOCK_SHADERAPI();
	if ( g_pHardwareConfig->GetDXSupportLevel() < 80 )
		return;

	// Set a couple standard constants....
	Vector4D standardVertexShaderConstant( 0.0f, 1.0f, 2.0f, 0.5f );
	SetVertexShaderConstant( VERTEX_SHADER_MATH_CONSTANTS0, standardVertexShaderConstant.Base(), 1 );

	// [ gamma, overbright, 1/3, 1/overbright ]
	standardVertexShaderConstant.Init( 1.0f/2.2f, fOverbright, 1.0f / 3.0f, 1.0f / fOverbright );
	SetVertexShaderConstant( VERTEX_SHADER_MATH_CONSTANTS1, standardVertexShaderConstant.Base(), 1 );

	int nModelIndex = g_pHardwareConfig->Caps().m_nDXSupportLevel < 90 ? VERTEX_SHADER_MODEL - 10 : VERTEX_SHADER_MODEL;

/*
	if ( g_pHardwareConfig->Caps().m_SupportsVertexShaders_3_0 )
	{
		Vector4D factors[4];
		factors[0].Init( 1, 0, 0, 0 );
		factors[1].Init( 0, 1, 0, 0 );
		factors[2].Init( 0, 0, 1, 0 );
		factors[3].Init( 0, 0, 0, 1 );
		SetVertexShaderConstant( VERTEX_SHADER_DOT_PRODUCT_FACTORS, factors[0].Base(), 4 );
	}
*/

	if ( g_pHardwareConfig->Caps().m_SupportsVertexShaders_2_0 )
	{
		float c[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
		SetVertexShaderConstant( VERTEX_SHADER_FLEXSCALE, c, 1 );
	}
	else
	{
		// These point to the lighting and the transforms
		standardVertexShaderConstant.Init( 
			VERTEX_SHADER_LIGHTS,
			VERTEX_SHADER_LIGHTS + 5, 
			// Use COLOR instead of UBYTE4 since Geforce3 does not support it
			// vConst.w should be 3, but due to about hack, mul by 255 and add epsilon
			// 360 supports UBYTE4, so no fixup required
			(IsPC() || !IsX360()) ? 765.01f : 3.0f,
			nModelIndex );	// DX8 has different constant packing

		SetVertexShaderConstant( VERTEX_SHADER_LIGHT_INDEX, standardVertexShaderConstant.Base(), 1 );
	}
}

//-----------------------------------------------------------------------------
// Initialize vertex and pixel shaders
//-----------------------------------------------------------------------------
void CShaderAPIDx8::InitVertexAndPixelShaders()
{
	// Allocate space for the pixel and vertex shader constants...
	if ( g_pHardwareConfig->Caps().m_SupportsPixelShaders )
	{
		// pixel shaders
		{
			if (m_DynamicState.m_pVectorPixelShaderConstant)
			{
				delete[] m_DynamicState.m_pVectorPixelShaderConstant;
			}
			m_DynamicState.m_pVectorPixelShaderConstant = new Vector4D[g_pHardwareConfig->Caps().m_NumPixelShaderConstants];

			if (m_DesiredState.m_pVectorPixelShaderConstant)
			{
				delete[] m_DesiredState.m_pVectorPixelShaderConstant;
			}
			m_DesiredState.m_pVectorPixelShaderConstant = new Vector4D[g_pHardwareConfig->Caps().m_NumPixelShaderConstants];

			if (m_DynamicState.m_pBooleanPixelShaderConstant)
			{
				delete[] m_DynamicState.m_pBooleanPixelShaderConstant;
			}
			m_DynamicState.m_pBooleanPixelShaderConstant = new BOOL[g_pHardwareConfig->Caps().m_NumBooleanPixelShaderConstants];

			if (m_DesiredState.m_pBooleanPixelShaderConstant)
			{
				delete[] m_DesiredState.m_pBooleanPixelShaderConstant;
			}
			m_DesiredState.m_pBooleanPixelShaderConstant = new BOOL[g_pHardwareConfig->Caps().m_NumBooleanPixelShaderConstants];
		
			if (m_DynamicState.m_pIntegerPixelShaderConstant)
			{
				delete[] m_DynamicState.m_pIntegerPixelShaderConstant;
			}
			m_DynamicState.m_pIntegerPixelShaderConstant = new IntVector4D[g_pHardwareConfig->Caps().m_NumIntegerPixelShaderConstants];

			if (m_DesiredState.m_pIntegerPixelShaderConstant)
			{
				delete[] m_DesiredState.m_pIntegerPixelShaderConstant;
			}
			m_DesiredState.m_pIntegerPixelShaderConstant = new IntVector4D[g_pHardwareConfig->Caps().m_NumIntegerPixelShaderConstants];

			// force reset vector pixel constants
			int i;
			for ( i = 0; i < g_pHardwareConfig->Caps().m_NumPixelShaderConstants; ++i )
			{
				m_DesiredState.m_pVectorPixelShaderConstant[i].Init();
			}
			SetPixelShaderConstant( 0, m_DesiredState.m_pVectorPixelShaderConstant[0].Base(), g_pHardwareConfig->Caps().m_NumPixelShaderConstants, true );

			// force reset boolean pixel constants
			int nNumBooleanPixelShaderConstants = g_pHardwareConfig->Caps().m_NumBooleanPixelShaderConstants;
			if ( nNumBooleanPixelShaderConstants )
			{
				for ( i = 0; i < nNumBooleanPixelShaderConstants; ++i )
				{
					m_DesiredState.m_pBooleanPixelShaderConstant[i] = 0;
				}
				SetBooleanPixelShaderConstant( 0, m_DesiredState.m_pBooleanPixelShaderConstant, nNumBooleanPixelShaderConstants, true );
			}

			// force reset integer pixel constants
			int nNumIntegerPixelShaderConstants = g_pHardwareConfig->Caps().m_NumIntegerPixelShaderConstants;
			if ( nNumIntegerPixelShaderConstants )
			{
				for ( i = 0; i < nNumIntegerPixelShaderConstants; ++i )
				{
					m_DesiredState.m_pIntegerPixelShaderConstant[i].Init();
				}
				SetIntegerPixelShaderConstant( 0, m_DesiredState.m_pIntegerPixelShaderConstant[0].Base(), nNumIntegerPixelShaderConstants, true );
			}
		}
		
		// vertex shaders
		{
			if (m_DynamicState.m_pVectorVertexShaderConstant)
			{
				delete[] m_DynamicState.m_pVectorVertexShaderConstant;
			}
			m_DynamicState.m_pVectorVertexShaderConstant = new Vector4D[g_pHardwareConfig->Caps().m_NumVertexShaderConstants];

			if (m_DesiredState.m_pVectorVertexShaderConstant)
			{
				delete[] m_DesiredState.m_pVectorVertexShaderConstant;
			}
			m_DesiredState.m_pVectorVertexShaderConstant = new Vector4D[g_pHardwareConfig->Caps().m_NumVertexShaderConstants];

			if (m_DynamicState.m_pBooleanVertexShaderConstant)
			{
				delete[] m_DynamicState.m_pBooleanVertexShaderConstant;
			}
			m_DynamicState.m_pBooleanVertexShaderConstant = new BOOL[g_pHardwareConfig->Caps().m_NumBooleanVertexShaderConstants];

			if (m_DesiredState.m_pBooleanVertexShaderConstant)
			{
				delete[] m_DesiredState.m_pBooleanVertexShaderConstant;
			}
			m_DesiredState.m_pBooleanVertexShaderConstant = new BOOL[g_pHardwareConfig->Caps().m_NumBooleanVertexShaderConstants];

			if (m_DynamicState.m_pIntegerVertexShaderConstant)
			{
				delete[] m_DynamicState.m_pIntegerVertexShaderConstant;
			}
			m_DynamicState.m_pIntegerVertexShaderConstant = new IntVector4D[g_pHardwareConfig->Caps().m_NumIntegerVertexShaderConstants];

			if (m_DesiredState.m_pIntegerVertexShaderConstant)
			{
				delete[] m_DesiredState.m_pIntegerVertexShaderConstant;
			}
			m_DesiredState.m_pIntegerVertexShaderConstant = new IntVector4D[g_pHardwareConfig->Caps().m_NumIntegerVertexShaderConstants];

			// force reset vector vertex constants
			int i;
			for ( i = 0; i < g_pHardwareConfig->Caps().m_NumVertexShaderConstants; ++i )
			{
				m_DesiredState.m_pVectorVertexShaderConstant[i].Init();
			}
			SetVertexShaderConstant( 0, m_DesiredState.m_pVectorVertexShaderConstant[0].Base(), g_pHardwareConfig->Caps().m_NumVertexShaderConstants, true );

			// force reset boolean vertex constants
			for ( i = 0; i < g_pHardwareConfig->Caps().m_NumBooleanVertexShaderConstants; ++i )
			{
				m_DesiredState.m_pBooleanVertexShaderConstant[i] = 0;
			}
			SetBooleanVertexShaderConstant( 0, m_DesiredState.m_pBooleanVertexShaderConstant, g_pHardwareConfig->Caps().m_NumBooleanVertexShaderConstants, true );

			// force reset integer vertex constants
			for ( i = 0; i < g_pHardwareConfig->Caps().m_NumIntegerVertexShaderConstants; ++i )
			{
				m_DesiredState.m_pIntegerVertexShaderConstant[i].Init();
			}
			SetIntegerVertexShaderConstant( 0, m_DesiredState.m_pIntegerVertexShaderConstant[0].Base(), g_pHardwareConfig->Caps().m_NumIntegerVertexShaderConstants, true );
		}

		if ( IsX360() )
		{
			// to init/update all constants, must disable ownership
			bool bPreviousState = OwnGPUResources( false );
			WriteShaderConstantsToGPU();
			OwnGPUResources( bPreviousState );
		}
		SetStandardVertexShaderConstants( OVERBRIGHT );
	}

	// Set up the vertex and pixel shader stuff
	ShaderManager()->ResetShaderState();
}


//-----------------------------------------------------------------------------
// Initialize render state
//-----------------------------------------------------------------------------
void CShaderAPIDx8::InitRenderState()
{ 
	// Set the default shadow state
	g_pShaderShadowDx8->SetDefaultState();

	// Grab a snapshot of this state; we'll be using it to set the board
	// state to something well defined.
	m_TransitionTable.TakeDefaultStateSnapshot();

	if ( !IsDeactivated() )
	{
		ResetRenderState();
	}
}


//-----------------------------------------------------------------------------
// Commits vertex textures
//-----------------------------------------------------------------------------
static void CommitVertexTextures( IDirect3DDevice9 *pDevice, const DynamicState_t &desiredState, DynamicState_t &currentState, bool bForce )
{
	int nCount = g_pMaterialSystemHardwareConfig->GetVertexTextureCount();
	for ( int i = 0; i < nCount; ++i )
	{
		VertexTextureState_t &currentVTState = currentState.m_VertexTextureState[i];
		ShaderAPITextureHandle_t textureHandle = desiredState.m_VertexTextureState[i].m_BoundTexture;

		Texture_t *pTexture = ( textureHandle != INVALID_SHADERAPI_TEXTURE_HANDLE ) ? &g_ShaderAPIDX8.GetTexture( textureHandle ) : NULL;
		if ( pTexture && ( pTexture->m_Flags & Texture_t::IS_VERTEX_TEXTURE ) == 0 )
		{
			Warning( "Attempting to bind a vertex texture (%s) which was not created as a vertex texture!\n", pTexture->m_DebugName.String() );
		}

		if ( bForce || ( currentVTState.m_BoundTexture != textureHandle ) )
		{
			currentVTState.m_BoundTexture = textureHandle;

// 			RECORD_COMMAND( DX8_SET_VERTEXTEXTURE, 3 );
// 			RECORD_INT( D3DVERTEXTEXTURESAMPLER0 + nStage );
// 			RECORD_INT( pTexture ? pTexture->GetUniqueID() : 0xFFFF );
// 			RECORD_INT( 0 );

			IDirect3DBaseTexture *pD3DTexture = ( textureHandle >= 0 ) ? g_ShaderAPIDX8.GetD3DTexture( textureHandle ) : NULL;

			pDevice->SetTexture( D3DVERTEXTEXTURESAMPLER0 + i, pD3DTexture );
		}

		if ( !pTexture )
			continue;

		D3DTEXTUREADDRESS nNewUTexWrap = pTexture->m_UTexWrap;
		if ( bForce || ( currentVTState.m_UTexWrap != nNewUTexWrap ) )
		{
			currentVTState.m_UTexWrap = nNewUTexWrap;
			SetSamplerState( pDevice, D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_ADDRESSU, currentVTState.m_UTexWrap );
		}

		D3DTEXTUREADDRESS nNewVTexWrap = pTexture->m_VTexWrap;
		if ( bForce || ( currentVTState.m_VTexWrap != nNewVTexWrap ) )
		{
			currentVTState.m_VTexWrap = nNewVTexWrap;
			SetSamplerState( pDevice, D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_ADDRESSV, currentVTState.m_VTexWrap );
		}

		/*
		D3DTEXTUREADDRESS nNewWTexWrap = pTexture->GetWTexWrap();
		if ( bForce || ( currentVTState.m_WTexWrap != nNewWTexWrap ) )
		{
			currentVTState.m_WTexWrap = nNewWTexWrap;
			SetSamplerState( pDevice, D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_ADDRESSW, currentVTState.m_WTexWrap );
		}
		*/

		D3DTEXTUREFILTERTYPE nNewMinFilter = pTexture->m_MinFilter;
		if ( bForce || ( currentVTState.m_MinFilter != nNewMinFilter ) )
		{
			currentVTState.m_MinFilter = nNewMinFilter;
			SetSamplerState( pDevice, D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_MINFILTER, currentVTState.m_MinFilter );
		}

		D3DTEXTUREFILTERTYPE nNewMagFilter = pTexture->m_MagFilter;
		if ( bForce || ( currentVTState.m_MagFilter != nNewMagFilter ) )
		{
			currentVTState.m_MagFilter = nNewMagFilter;
			SetSamplerState( pDevice, D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_MAGFILTER, currentVTState.m_MagFilter );
		}

		D3DTEXTUREFILTERTYPE nNewMipFilter = pTexture->m_MipFilter;
		if ( bForce || ( currentVTState.m_MipFilter != nNewMipFilter ) )
		{
			currentVTState.m_MipFilter = nNewMipFilter;
			SetSamplerState( pDevice, D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_MIPFILTER, currentVTState.m_MipFilter );
		}
	}
}


//-----------------------------------------------------------------------------
// Returns true if the state snapshot is translucent
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::IsTranslucent( StateSnapshot_t id ) const
{
	LOCK_SHADERAPI();
	return m_TransitionTable.GetSnapshot(id).m_AlphaBlendEnable;
}


//-----------------------------------------------------------------------------
// Returns true if the state snapshot is alpha tested
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::IsAlphaTested( StateSnapshot_t id ) const
{
	LOCK_SHADERAPI();
	return m_TransitionTable.GetSnapshot(id).m_AlphaTestEnable;
}


//-----------------------------------------------------------------------------
// Gets at the shadow state for a particular state snapshot
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::IsDepthWriteEnabled( StateSnapshot_t id ) const
{
	LOCK_SHADERAPI();
	return m_TransitionTable.GetSnapshot(id).m_ZWriteEnable;
}



//-----------------------------------------------------------------------------
// Returns true if the state snapshot uses shaders
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::UsesVertexAndPixelShaders( StateSnapshot_t id ) const
{
	LOCK_SHADERAPI();
	return m_TransitionTable.GetSnapshotShader(id).m_VertexShader != INVALID_SHADER;
}


//-----------------------------------------------------------------------------
// Takes a snapshot
//-----------------------------------------------------------------------------
StateSnapshot_t CShaderAPIDx8::TakeSnapshot( )
{
	LOCK_SHADERAPI();

	return m_TransitionTable.TakeSnapshot();
}

void CShaderAPIDx8::ResetDXRenderState( void )
{
	float zero = 0.0f;
	float one = 1.0f;
	DWORD dZero = *((DWORD*)(&zero));
	DWORD dOne = *((DWORD*)(&one));
	
	// Set default values for all dx render states.
	// NOTE: this does not include states encapsulated by the transition table,
	//       which are reset in CTransitionTable::UseDefaultState
    SetSupportedRenderStateForce( D3DRS_FILLMODE, D3DFILL_SOLID );
    SetSupportedRenderStateForce( D3DRS_SHADEMODE, D3DSHADE_GOURAUD );
    SetSupportedRenderStateForce( D3DRS_LASTPIXEL, TRUE );
    SetSupportedRenderStateForce( D3DRS_CULLMODE, D3DCULL_CCW );
    SetSupportedRenderStateForce( D3DRS_DITHERENABLE, FALSE );
    SetSupportedRenderStateForce( D3DRS_FOGENABLE, FALSE );
    SetSupportedRenderStateForce( D3DRS_SPECULARENABLE, FALSE );
    SetSupportedRenderStateForce( D3DRS_FOGCOLOR, 0 );
    SetSupportedRenderStateForce( D3DRS_FOGTABLEMODE, D3DFOG_NONE );
    SetSupportedRenderStateForce( D3DRS_FOGSTART, dZero );
    SetSupportedRenderStateForce( D3DRS_FOGEND, dOne );
    SetSupportedRenderStateForce( D3DRS_FOGDENSITY, dZero );
    SetSupportedRenderStateForce( D3DRS_RANGEFOGENABLE, FALSE );
    SetSupportedRenderStateForce( D3DRS_STENCILENABLE, FALSE);
    SetSupportedRenderStateForce( D3DRS_STENCILFAIL, D3DSTENCILOP_KEEP );
    SetSupportedRenderStateForce( D3DRS_STENCILZFAIL, D3DSTENCILOP_KEEP );
    SetSupportedRenderStateForce( D3DRS_STENCILPASS, D3DSTENCILOP_KEEP );
    SetSupportedRenderStateForce( D3DRS_STENCILFUNC, D3DCMP_ALWAYS );
    SetSupportedRenderStateForce( D3DRS_STENCILREF, 0 );
    SetSupportedRenderStateForce( D3DRS_STENCILMASK, 0xFFFFFFFF );
    SetSupportedRenderStateForce( D3DRS_STENCILWRITEMASK, 0xFFFFFFFF );
    SetSupportedRenderStateForce( D3DRS_TEXTUREFACTOR, 0xFFFFFFFF );
    SetSupportedRenderStateForce( D3DRS_WRAP0, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP1, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP2, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP3, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP4, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP5, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP6, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP7, 0 );
    SetSupportedRenderStateForce( D3DRS_CLIPPING, TRUE );
    SetSupportedRenderStateForce( D3DRS_LIGHTING, TRUE );
    SetSupportedRenderStateForce( D3DRS_AMBIENT, 0 );
    SetSupportedRenderStateForce( D3DRS_FOGVERTEXMODE, D3DFOG_NONE);
    SetSupportedRenderStateForce( D3DRS_COLORVERTEX, TRUE );
    SetSupportedRenderStateForce( D3DRS_LOCALVIEWER, TRUE );
    SetSupportedRenderStateForce( D3DRS_NORMALIZENORMALS, FALSE );
    SetSupportedRenderStateForce( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_COLOR2 );
    SetSupportedRenderStateForce( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_MATERIAL );
    SetSupportedRenderStateForce( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL );
    SetSupportedRenderStateForce( D3DRS_VERTEXBLEND, D3DVBF_DISABLE );
    SetSupportedRenderStateForce( D3DRS_CLIPPLANEENABLE, 0 );

	// -disable_d3d9_hacks is for debugging. For example, the "CENT" driver hack thing causes the flashlight pass to appear much brighter on NVidia drivers.
	if ( IsPC() && !IsOpenGL() && !CommandLine()->CheckParm( "-disable_d3d9_hacks" ) )
	{	
		if ( g_pHardwareConfig->Caps().m_bNeedsATICentroidHack && ( g_pHardwareConfig->Caps().m_VendorID == VENDORID_ATI ) )
		{
			SetSupportedRenderStateForce( D3DRS_POINTSIZE, MAKEFOURCC( 'C', 'E', 'N', 'T' ) );
		}

		if( g_pHardwareConfig->Caps().m_bDisableShaderOptimizations )
		{
			SetSupportedRenderStateForce( D3DRS_ADAPTIVETESS_Y, MAKEFOURCC( 'C', 'O', 'P', 'M' ) );
		}
	}
	SetSupportedRenderStateForce( D3DRS_POINTSIZE, dOne );
    SetSupportedRenderStateForce( D3DRS_POINTSIZE_MIN, dOne );
    SetSupportedRenderStateForce( D3DRS_POINTSPRITEENABLE, FALSE );
    SetSupportedRenderStateForce( D3DRS_POINTSCALEENABLE, FALSE );
    SetSupportedRenderStateForce( D3DRS_POINTSCALE_A, dOne );
    SetSupportedRenderStateForce( D3DRS_POINTSCALE_B, dZero );
    SetSupportedRenderStateForce( D3DRS_POINTSCALE_C, dZero );
    SetSupportedRenderStateForce( D3DRS_MULTISAMPLEANTIALIAS, TRUE );
    SetSupportedRenderStateForce( D3DRS_MULTISAMPLEMASK, 0xFFFFFFFF );
    SetSupportedRenderStateForce( D3DRS_PATCHEDGESTYLE, D3DPATCHEDGE_DISCRETE );
    SetSupportedRenderStateForce( D3DRS_DEBUGMONITORTOKEN, D3DDMT_ENABLE );
	float sixtyFour = 64.0f;
    SetSupportedRenderStateForce( D3DRS_POINTSIZE_MAX, *((DWORD*)(&sixtyFour)));
    SetSupportedRenderStateForce( D3DRS_INDEXEDVERTEXBLENDENABLE, FALSE );
    SetSupportedRenderStateForce( D3DRS_TWEENFACTOR, dZero );
    SetSupportedRenderStateForce( D3DRS_POSITIONDEGREE, D3DDEGREE_CUBIC );
    SetSupportedRenderStateForce( D3DRS_NORMALDEGREE, D3DDEGREE_LINEAR );
    SetSupportedRenderStateForce( D3DRS_SCISSORTESTENABLE, FALSE);
    SetSupportedRenderStateForce( D3DRS_SLOPESCALEDEPTHBIAS, dZero );
    SetSupportedRenderStateForce( D3DRS_ANTIALIASEDLINEENABLE, FALSE );
    SetSupportedRenderStateForce( D3DRS_MINTESSELLATIONLEVEL, dOne );
    SetSupportedRenderStateForce( D3DRS_MAXTESSELLATIONLEVEL, dOne );
    SetSupportedRenderStateForce( D3DRS_ADAPTIVETESS_X, dZero );
    SetSupportedRenderStateForce( D3DRS_ADAPTIVETESS_Y, dZero );
    SetSupportedRenderStateForce( D3DRS_ADAPTIVETESS_Z, dOne );
    SetSupportedRenderStateForce( D3DRS_ADAPTIVETESS_W, dZero );
    SetSupportedRenderStateForce( D3DRS_ENABLEADAPTIVETESSELLATION, FALSE );
    SetSupportedRenderStateForce( D3DRS_TWOSIDEDSTENCILMODE, FALSE );
    SetSupportedRenderStateForce( D3DRS_CCW_STENCILFAIL, D3DSTENCILOP_KEEP );
    SetSupportedRenderStateForce( D3DRS_CCW_STENCILZFAIL, D3DSTENCILOP_KEEP );
    SetSupportedRenderStateForce( D3DRS_CCW_STENCILPASS, D3DSTENCILOP_KEEP );
    SetSupportedRenderStateForce( D3DRS_CCW_STENCILFUNC, D3DCMP_ALWAYS );
    SetSupportedRenderStateForce( D3DRS_COLORWRITEENABLE1, 0x0000000f );
    SetSupportedRenderStateForce( D3DRS_COLORWRITEENABLE2, 0x0000000f );
    SetSupportedRenderStateForce( D3DRS_COLORWRITEENABLE3, 0x0000000f );
    SetSupportedRenderStateForce( D3DRS_BLENDFACTOR, 0xffffffff );
    SetSupportedRenderStateForce( D3DRS_SRGBWRITEENABLE, 0);
    SetSupportedRenderStateForce( D3DRS_DEPTHBIAS, dZero );
    SetSupportedRenderStateForce( D3DRS_WRAP8, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP9, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP10, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP11, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP12, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP13, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP14, 0 );
    SetSupportedRenderStateForce( D3DRS_WRAP15, 0 );
    SetSupportedRenderStateForce( D3DRS_BLENDOP, D3DBLENDOP_ADD );
    SetSupportedRenderStateForce( D3DRS_BLENDOPALPHA, D3DBLENDOP_ADD );

#if defined( _X360 )
	SetSupportedRenderStateForce( D3DRS_HIZENABLE, D3DHIZ_AUTOMATIC );
	SetSupportedRenderStateForce( D3DRS_HIZWRITEENABLE, D3DHIZ_AUTOMATIC );

	SetSupportedRenderStateForce( D3DRS_HISTENCILENABLE, FALSE );
	SetSupportedRenderStateForce( D3DRS_HISTENCILWRITEENABLE, FALSE );
	SetSupportedRenderStateForce( D3DRS_HISTENCILFUNC, D3DHSCMP_EQUAL );
	SetSupportedRenderStateForce( D3DRS_HISTENCILREF, 0 );
#endif
}

//-----------------------------------------------------------------------------
// Own GPU Resources.  Return previous state.
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::OwnGPUResources( bool bEnable )
{
#if defined( _X360 )
	if ( m_bGPUOwned == bEnable )
	{
		return m_bGPUOwned;
	}

	if ( !bEnable )
	{
		Dx9Device()->GpuDisownAll();
	}
	else
	{
		// owned GPU constants can be set very fast, and must be in blocks of 4
		// there are 256, but the game only uses 217 (snapped to 220), leaving just enough room for shader literals
		COMPILE_TIME_ASSERT( VERTEX_SHADER_MODEL + 3*NUM_MODEL_TRANSFORMS == 217 );
		Dx9Device()->GpuOwnVertexShaderConstantF( 0, AlignValue( VERTEX_SHADER_MODEL + 3*NUM_MODEL_TRANSFORMS, 4 ) );
		// there are 256, but the game only utilizes 32, leaving lots of room for shader literals
		Dx9Device()->GpuOwnPixelShaderConstantF( 0, 32 );
	}

	bool bPrevious = m_bGPUOwned;
	m_bGPUOwned = bEnable;
	
	return bPrevious;
#else
	return false;
#endif
}

//-----------------------------------------------------------------------------
// Reset render state (to its initial value)
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ResetRenderState( bool bFullReset )
{
	LOCK_SHADERAPI();
	RECORD_DEBUG_STRING( "BEGIN ResetRenderState" );

	if ( !CanDownloadTextures() )
	{
		QueueResetRenderState();
		return;
	}

	m_bResettingRenderState = true;

	OwnGPUResources( true );

	ResetDXRenderState();

	// We're not currently rendering anything
	m_nCurrentSnapshot = -1;

	D3DXMatrixIdentity( &m_CachedPolyOffsetProjectionMatrix );
	D3DXMatrixIdentity( &m_CachedFastClipProjectionMatrix );
	D3DXMatrixIdentity( &m_CachedFastClipPolyOffsetProjectionMatrix );
	m_UsingTextureRenderTarget = false;

	m_SceneFogColor[0] = 0;
	m_SceneFogColor[1] = 0;
	m_SceneFogColor[2] = 0;
	m_SceneFogMode = MATERIAL_FOG_NONE;

	// This is state that isn't part of the snapshot per-se, because we
	// don't need it when it's actually time to render. This just helps us
	// to construct the shadow state.
	m_DynamicState.m_ClearColor = D3DCOLOR_XRGB(0,0,0);

	if ( bFullReset )
	{
		InitVertexAndPixelShaders();
	}
	else
	{
		// just need to dirty the dynamic state, desired state gets copied into below
		Q_memset( m_DynamicState.m_pVectorPixelShaderConstant, 0, g_pHardwareConfig->Caps().m_NumPixelShaderConstants * sizeof( Vector4D ) );
		Q_memset( m_DynamicState.m_pBooleanPixelShaderConstant, 0, g_pHardwareConfig->Caps().m_NumBooleanPixelShaderConstants * sizeof( BOOL ) );
		Q_memset( m_DynamicState.m_pIntegerPixelShaderConstant, 0, g_pHardwareConfig->Caps().m_NumIntegerPixelShaderConstants * sizeof( IntVector4D ) );

		Q_memset( m_DynamicState.m_pVectorVertexShaderConstant, 0, g_pHardwareConfig->Caps().m_NumVertexShaderConstants * sizeof( Vector4D ) );
		Q_memset( m_DynamicState.m_pBooleanVertexShaderConstant, 0, g_pHardwareConfig->Caps().m_NumBooleanVertexShaderConstants * sizeof( BOOL ) );
		Q_memset( m_DynamicState.m_pIntegerVertexShaderConstant, 0, g_pHardwareConfig->Caps().m_NumIntegerVertexShaderConstants * sizeof( IntVector4D ) );

		SetStandardVertexShaderConstants( OVERBRIGHT );
	}

	//Set the default compressed depth range written to dest alpha. Only need to compress it for 8bit alpha to get a useful gradient.
	m_DynamicState.m_DestAlphaDepthRange = (g_pHardwareConfig->GetHDRType() == HDR_TYPE_FLOAT) ? 8192.0f : 192.0f;

	m_CachedAmbientLightCube = STATE_CHANGED;

	// Set the constant color
	m_DynamicState.m_ConstantColor = 0xFFFFFFFF;
	Color4ub( 255, 255, 255, 255 );

	// Ambient light color
	m_DynamicState.m_Ambient = 0;
	SetSupportedRenderState( D3DRS_AMBIENT, m_DynamicState.m_Ambient );

	// Fog
	m_VertexShaderFogParams[0] = m_VertexShaderFogParams[1] = 0.0f;
	m_WorldSpaceCameraPositon.Init( 0, 0, 0.01f, 0 ); // Don't let z be zero, as some pixel shaders will divide by this
	m_DynamicState.m_FogColor = 0xFFFFFFFF;
	m_DynamicState.m_PixelFogColor[0] = m_DynamicState.m_PixelFogColor[1] = 
		m_DynamicState.m_PixelFogColor[2] = m_DynamicState.m_PixelFogColor[3] = 0.0f;
	m_DynamicState.m_bFogGammaCorrectionDisabled = false;
	m_DynamicState.m_FogEnable = false;
	m_DynamicState.m_SceneFog = MATERIAL_FOG_NONE;
	m_DynamicState.m_FogMode = D3DFOG_NONE;
	m_DynamicState.m_FogStart = -1;
	m_DynamicState.m_FogEnd = -1;
	m_DynamicState.m_FogMaxDensity = -1.0f;
	m_DynamicState.m_FogZ = 0.0f;

	SetSupportedRenderState( D3DRS_FOGCOLOR, m_DynamicState.m_FogColor );
	SetSupportedRenderState( D3DRS_FOGENABLE, m_DynamicState.m_FogEnable );
	SetSupportedRenderState( D3DRS_FOGTABLEMODE, D3DFOG_NONE );
	SetSupportedRenderState( D3DRS_FOGVERTEXMODE, D3DFOG_NONE );
	SetSupportedRenderState( D3DRS_RANGEFOGENABLE, false );

	FogStart( 0 );
	FogEnd( 0 );
	FogMaxDensity( 1.0f );

	m_DynamicState.m_bSRGBWritesEnabled = false;

	// Set the cull mode
	m_DynamicState.m_bCullEnabled = true;
	m_DynamicState.m_CullMode = D3DCULL_CCW;
	m_DynamicState.m_DesiredCullMode = D3DCULL_CCW;
	SetRenderState( D3DRS_CULLMODE, D3DCULL_CCW );

	// No shade mode yet
	const D3DSHADEMODE D3DSHADE_NONE = (D3DSHADEMODE)0;
	m_DynamicState.m_ShadeMode = D3DSHADE_NONE;
	ShadeMode( SHADER_SMOOTH );

	m_DynamicState.m_bHWMorphingEnabled = false;

	// Skinning...
	m_DynamicState.m_NumBones = 0;
	m_DynamicState.m_VertexBlend = (D3DVERTEXBLENDFLAGS)-1;
	SetSupportedRenderState( D3DRS_VERTEXBLEND, D3DVBF_DISABLE );
	SetSupportedRenderState( D3DRS_INDEXEDVERTEXBLENDENABLE, FALSE );

	// No normal normalization
	m_DynamicState.m_NormalizeNormals = false;
	SetSupportedRenderState( D3DRS_NORMALIZENORMALS, m_DynamicState.m_NormalizeNormals );

	bool bAntialiasing = ( m_PresentParameters.MultiSampleType != D3DMULTISAMPLE_NONE );
	if ( g_pHardwareConfig->GetHDRType() == HDR_TYPE_FLOAT )
	{
		bAntialiasing = false;
	}
	SetRenderState( D3DRS_MULTISAMPLEANTIALIAS, bAntialiasing );
	
	// Anisotropic filtering is disabled by default
	if ( bFullReset )
	{
		SetAnisotropicLevel( 1 );
	}
	
	int i;
	for ( i = 0; i < g_pHardwareConfig->ActualCaps().m_NumTextureStages; ++i )
	{
		TextureStage(i).m_TextureTransformFlags = D3DTTFF_DISABLE;
		TextureStage(i).m_BumpEnvMat00 = 1.0f;
		TextureStage(i).m_BumpEnvMat01 = 0.0f;
		TextureStage(i).m_BumpEnvMat10 = 0.0f;
		TextureStage(i).m_BumpEnvMat11 = 1.0f;

		SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, TextureStage(i).m_TextureTransformFlags );
		SetTextureStageState( i, D3DTSS_BUMPENVMAT00, *( ( LPDWORD ) (&TextureStage(i).m_BumpEnvMat00) ) );
		SetTextureStageState( i, D3DTSS_BUMPENVMAT01, *( ( LPDWORD ) (&TextureStage(i).m_BumpEnvMat01) ) );
		SetTextureStageState( i, D3DTSS_BUMPENVMAT10, *( ( LPDWORD ) (&TextureStage(i).m_BumpEnvMat10) ) );
		SetTextureStageState( i, D3DTSS_BUMPENVMAT11, *( ( LPDWORD ) (&TextureStage(i).m_BumpEnvMat11) ) );
	}

	for ( i = 0; i < g_pHardwareConfig->ActualCaps().m_NumSamplers; ++i )
	{
		SamplerState(i).m_BoundTexture = INVALID_SHADERAPI_TEXTURE_HANDLE;						  
		SamplerState(i).m_UTexWrap = D3DTADDRESS_WRAP;
		SamplerState(i).m_VTexWrap = D3DTADDRESS_WRAP;
		SamplerState(i).m_WTexWrap = D3DTADDRESS_WRAP;
		SamplerState(i).m_MagFilter = D3DTEXF_POINT;
		SamplerState(i).m_MinFilter = D3DTEXF_POINT;
		SamplerState(i).m_MipFilter = D3DTEXF_NONE;
		SamplerState(i).m_FinestMipmapLevel = 0;
		SamplerState(i).m_LodBias = 0.0f;
		SamplerState(i).m_TextureEnable = false;
		SamplerState(i).m_SRGBReadEnable = false;

		// Just some initial state...
		Dx9Device()->SetTexture( i, 0 );

		SetSamplerState( i, D3DSAMP_ADDRESSU, SamplerState(i).m_UTexWrap );
		SetSamplerState( i, D3DSAMP_ADDRESSV, SamplerState(i).m_VTexWrap ); 
		SetSamplerState( i, D3DSAMP_ADDRESSW, SamplerState(i).m_WTexWrap ); 
		SetSamplerState( i, D3DSAMP_MINFILTER, SamplerState(i).m_MinFilter );
		SetSamplerState( i, D3DSAMP_MAGFILTER, SamplerState(i).m_MagFilter );
		SetSamplerState( i, D3DSAMP_MIPFILTER, SamplerState(i).m_MipFilter );
		SetSamplerState( i, D3DSAMP_MAXMIPLEVEL, SamplerState(i).m_FinestMipmapLevel );
		SetSamplerState( i, D3DSAMP_MIPMAPLODBIAS, SamplerState(i).m_LodBias );

		SetSamplerState( i, D3DSAMP_BORDERCOLOR, RGB( 0,0,0 ) );
	}

	// FIXME!!!!! : This barfs with the debug runtime on 6800.
	for( i = 0; i < g_pHardwareConfig->ActualCaps().m_nVertexTextureCount; i++ )
	{
		m_DynamicState.m_VertexTextureState[i].m_BoundTexture = INVALID_SHADERAPI_TEXTURE_HANDLE;
		Dx9Device()->SetTexture( D3DVERTEXTEXTURESAMPLER0 + i, NULL );

		m_DynamicState.m_VertexTextureState[i].m_UTexWrap = D3DTADDRESS_CLAMP;
		m_DynamicState.m_VertexTextureState[i].m_VTexWrap = D3DTADDRESS_CLAMP;
//		m_DynamicState.m_VertexTextureState[i].m_WTexWrap = D3DTADDRESS_CLAMP;
		m_DynamicState.m_VertexTextureState[i].m_MinFilter = D3DTEXF_POINT;
		m_DynamicState.m_VertexTextureState[i].m_MagFilter = D3DTEXF_POINT;
		m_DynamicState.m_VertexTextureState[i].m_MipFilter = D3DTEXF_POINT;
		SetSamplerState( D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_ADDRESSU, m_DynamicState.m_VertexTextureState[i].m_UTexWrap );
		SetSamplerState( D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_ADDRESSV, m_DynamicState.m_VertexTextureState[i].m_VTexWrap );
//		SetSamplerState( D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_ADDRESSW, m_DynamicState.m_VertexTextureState[i].m_WTexWrap );
		SetSamplerState( D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_MINFILTER, m_DynamicState.m_VertexTextureState[i].m_MinFilter );
		SetSamplerState( D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_MAGFILTER, m_DynamicState.m_VertexTextureState[i].m_MagFilter );
		SetSamplerState( D3DVERTEXTEXTURESAMPLER0 + i, D3DSAMP_MIPFILTER, m_DynamicState.m_VertexTextureState[i].m_MipFilter );
	}

	m_DynamicState.m_NumLights = 0;
	for ( i = 0; i < MAX_NUM_LIGHTS; ++i)
	{
		m_DynamicState.m_LightEnable[i] = false;
		m_DynamicState.m_LightChanged[i] = STATE_CHANGED;
		m_DynamicState.m_LightEnableChanged[i] = STATE_CHANGED;
	}

	for ( i = 0; i < NUM_MATRIX_MODES; ++i)
	{
		// By setting this to *not* be identity, we force an update...
		m_DynamicState.m_TransformType[i] = TRANSFORM_IS_GENERAL;
		m_DynamicState.m_TransformChanged[i] = STATE_CHANGED;
	}

	// set the board state to match the default state
	m_TransitionTable.UseDefaultState();

	// Set the default render state
	SetDefaultState();

	// Constant for all time
	SetSupportedRenderState( D3DRS_CLIPPING, TRUE );
	SetSupportedRenderState( D3DRS_LOCALVIEWER, TRUE );
	SetSupportedRenderState( D3DRS_POINTSCALEENABLE, FALSE );
	SetSupportedRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_MATERIAL );
	SetSupportedRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_COLOR2 );
	SetSupportedRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_MATERIAL );
	SetSupportedRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL );
	SetSupportedRenderState( D3DRS_COLORVERTEX, TRUE ); // This defaults to true anyways. . . 

	// Set a default identity material.
	SetDefaultMaterial();

#if 0
	float fBias = -1.0f;
	SetTextureStageState( 0, D3DTSS_MIPMAPLODBIAS, *( ( LPDWORD ) (&fBias) ) );
	SetTextureStageState( 1, D3DTSS_MIPMAPLODBIAS, *( ( LPDWORD ) (&fBias) ) );
	SetTextureStageState( 2, D3DTSS_MIPMAPLODBIAS, *( ( LPDWORD ) (&fBias) ) );
	SetTextureStageState( 3, D3DTSS_MIPMAPLODBIAS, *( ( LPDWORD ) (&fBias) ) );
#endif

	if ( bFullReset )
	{
		// Set the modelview matrix to identity too
		for ( i = 0; i < NUM_MODEL_TRANSFORMS; ++i )
		{
			SetIdentityMatrix( m_boneMatrix[i] );
		}
		MatrixMode( MATERIAL_VIEW );
		LoadIdentity();
		MatrixMode( MATERIAL_PROJECTION );
		LoadIdentity();
	}

#ifdef _X360
	m_DynamicState.m_bBuffer2Frames = m_bBuffer2FramesAhead;
	SetRenderState( D3DRS_BUFFER2FRAMES, m_DynamicState.m_bBuffer2Frames );
#endif

	m_DynamicState.m_Viewport.X = m_DynamicState.m_Viewport.Y = 
		m_DynamicState.m_Viewport.Width = m_DynamicState.m_Viewport.Height = 0xFFFFFFFF;
	m_DynamicState.m_Viewport.MinZ = m_DynamicState.m_Viewport.MaxZ = 0.0;

	// Be sure scissoring is off
	m_DynamicState.m_RenderState[D3DRS_SCISSORTESTENABLE] = FALSE;
	SetRenderState( D3DRS_SCISSORTESTENABLE, FALSE );
	m_DynamicState.m_ScissorRect.left   = -1;
	m_DynamicState.m_ScissorRect.top    = -1;
	m_DynamicState.m_ScissorRect.right  = -1;
	m_DynamicState.m_ScissorRect.bottom = -1;

	//SetHeightClipMode( MATERIAL_HEIGHTCLIPMODE_DISABLE );
	EnableFastClip( false );
	float fFakePlane[4];
	unsigned int iFakePlaneVal = 0xFFFFFFFF;
	fFakePlane[0] = fFakePlane[1] = fFakePlane[2] = fFakePlane[3] = *((float *)&iFakePlaneVal);
	SetFastClipPlane( fFakePlane ); //doing this to better wire up plane change detection

	float zero[4] = { 0.0f, 0.0f, 0.0f, 0.0f };

	// Make sure that our state is dirty.
	m_DynamicState.m_UserClipPlaneEnabled = 0;
	m_DynamicState.m_UserClipPlaneChanged = 0;
	m_DynamicState.m_UserClipLastUpdatedUsingFixedFunction = false;
	for( i = 0; i < g_pHardwareConfig->MaxUserClipPlanes(); i++ )
	{
		// Make sure that our state is dirty.
		m_DynamicState.m_UserClipPlaneWorld[i][0] = -1.0f;
		m_DynamicState.m_UserClipPlaneProj[i][0] = -9999.0f;
		m_DynamicState.m_UserClipPlaneEnabled |= ( 1 << i );
		SetClipPlane( i, zero );
		EnableClipPlane( i, false );
		Assert( m_DynamicState.m_UserClipPlaneEnabled == 0 );
	}
	Assert( m_DynamicState.m_UserClipPlaneChanged == ((1 << g_pHardwareConfig->MaxUserClipPlanes()) - 1) );

	m_DynamicState.m_FastClipEnabled = false;
	m_DynamicState.m_bFastClipPlaneChanged = true;

	// User clip override
	m_DynamicState.m_bUserClipTransformOverride = false;
	D3DXMatrixIdentity( &m_DynamicState.m_UserClipTransform );

	// Viewport defaults to the window size
	RECT windowRect;
#if !defined( DX_TO_GL_ABSTRACTION )
	GetClientRect( (HWND)m_hWnd, &windowRect );
#else
	toglGetClientRect( (VD3DHWND)m_hWnd, &windowRect );
#endif

	ShaderViewport_t viewport;
	viewport.Init( windowRect.left, windowRect.top, 
		windowRect.right - windowRect.left,	windowRect.bottom - windowRect.top );
	SetViewports( 1, &viewport );

	// No render mesh
	m_pRenderMesh = 0;

	// Reset cached vertex decl
	m_DynamicState.m_pVertexDecl = NULL;
	
	// Reset the render target to be the normal backbuffer
	if ( IsX360() )
	{
		m_hCachedRenderTarget = INVALID_SHADERAPI_TEXTURE_HANDLE;
		m_bUsingSRGBRenderTarget = false;
	}
	AcquireInternalRenderTargets();
	SetRenderTarget();

	// Maintain vertex + pixel shader constant buffers
	Vector4D *pVectorPixelShaderConstants = m_DesiredState.m_pVectorPixelShaderConstant;
	int *pBooleanPixelShaderConstants = m_DesiredState.m_pBooleanPixelShaderConstant;
	IntVector4D *pIntegerPixelShaderConstants = m_DesiredState.m_pIntegerPixelShaderConstant;
	Vector4D *pVectorVertexShaderConstants = m_DesiredState.m_pVectorVertexShaderConstant;
	int *pBooleanVertexShaderConstants = m_DesiredState.m_pBooleanVertexShaderConstant;
	IntVector4D *pIntegerVertexShaderConstants = m_DesiredState.m_pIntegerVertexShaderConstant;
	m_DesiredState = m_DynamicState;
	m_DesiredState.m_pVectorPixelShaderConstant = pVectorPixelShaderConstants;
	m_DesiredState.m_pBooleanPixelShaderConstant = pBooleanPixelShaderConstants;
	m_DesiredState.m_pIntegerPixelShaderConstant = pIntegerPixelShaderConstants;
	m_DesiredState.m_pVectorVertexShaderConstant = pVectorVertexShaderConstants;
	m_DesiredState.m_pBooleanVertexShaderConstant = pBooleanVertexShaderConstants;
	m_DesiredState.m_pIntegerVertexShaderConstant = pIntegerVertexShaderConstants;
	if ( g_pHardwareConfig->Caps().m_SupportsPixelShaders )
	{
		if ( !bFullReset )
		{
			//Full resets init the values to defaults. Normal resets just leave them dirty.
			if( g_pHardwareConfig->Caps().m_NumVertexShaderConstants != 0 )
				SetVertexShaderConstant( 0, m_DesiredState.m_pVectorVertexShaderConstant[0].Base(), IsX360() ? 217 : g_pHardwareConfig->Caps().m_NumVertexShaderConstants, true ); //217 on X360 to play nice with fast blatting code
			
			if( g_pHardwareConfig->Caps().m_NumIntegerVertexShaderConstants != 0 )
				SetIntegerVertexShaderConstant( 0, (int *)m_DesiredState.m_pIntegerVertexShaderConstant, g_pHardwareConfig->Caps().m_NumIntegerVertexShaderConstants, true );
			
			if( g_pHardwareConfig->Caps().m_NumBooleanVertexShaderConstants != 0 )
				SetBooleanVertexShaderConstant( 0, m_DesiredState.m_pBooleanVertexShaderConstant, g_pHardwareConfig->Caps().m_NumBooleanVertexShaderConstants, true );


			if( g_pHardwareConfig->Caps().m_NumPixelShaderConstants != 0 )
				SetPixelShaderConstant( 0, m_DesiredState.m_pVectorPixelShaderConstant[0].Base(), g_pHardwareConfig->Caps().m_NumPixelShaderConstants, true );

			if( g_pHardwareConfig->Caps().m_NumIntegerPixelShaderConstants != 0 )
				SetIntegerPixelShaderConstant( 0, (int *)m_DesiredState.m_pIntegerPixelShaderConstant, g_pHardwareConfig->Caps().m_NumIntegerPixelShaderConstants, true );

			if( g_pHardwareConfig->Caps().m_NumBooleanPixelShaderConstants != 0 )
				SetBooleanPixelShaderConstant( 0, m_DesiredState.m_pBooleanPixelShaderConstant, g_pHardwareConfig->Caps().m_NumBooleanPixelShaderConstants, true );
		}
	}

	RECORD_DEBUG_STRING( "END ResetRenderState" );

	m_bResettingRenderState = false;
}

//-----------------------------------------------------------------------------
// Sets the default render state
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetDefaultState()
{
	LOCK_SHADERAPI();

	// NOTE: This used to be in the material system, but I want to avoid all the per pass/batch
	// virtual function calls.
	int numTextureStages = g_pHardwareConfig->GetTextureStageCount();

	// FIXME: This is a brutal hack. We only need to load these transforms for fixed-function
	// hardware. Cap the max here to 4.
	if ( IsPC() )
	{
		numTextureStages = min( numTextureStages, 4 );
		int i;
		for( i = 0; i < numTextureStages; i++ )
		{
			CShaderAPIDx8::DisableTextureTransform( (TextureStage_t)i );
			CShaderAPIDx8::MatrixMode( (MaterialMatrixMode_t)(MATERIAL_TEXTURE0 + i) );
			CShaderAPIDx8::LoadIdentity( );
		}
	}
	CShaderAPIDx8::MatrixMode( MATERIAL_MODEL );

	CShaderAPIDx8::Color4ub( 255, 255, 255, 255 );
	CShaderAPIDx8::ShadeMode( SHADER_SMOOTH );
	CShaderAPIDx8::SetVertexShaderIndex( );
	CShaderAPIDx8::SetPixelShaderIndex( );

	MeshMgr()->MarkUnusedVertexFields( 0, 0, NULL );
}



//-----------------------------------------------------------------------------
//
// Methods related to vertex format
//
//-----------------------------------------------------------------------------


//-----------------------------------------------------------------------------
// Sets the vertex
//-----------------------------------------------------------------------------
inline void CShaderAPIDx8::SetVertexDecl( VertexFormat_t vertexFormat, bool bHasColorMesh, bool bUsingFlex, bool bUsingMorph )
{
	VPROF("CShaderAPIDx8::SetVertexDecl");
	IDirect3DVertexDeclaration9 *pDecl = FindOrCreateVertexDecl( vertexFormat, bHasColorMesh, bUsingFlex, bUsingMorph );
	Assert( pDecl );

	if ( ( pDecl != m_DynamicState.m_pVertexDecl ) && pDecl )
	{
		Dx9Device()->SetVertexDeclaration( pDecl );
		m_DynamicState.m_pVertexDecl = pDecl;
	}
}



//-----------------------------------------------------------------------------
//
// Methods related to vertex buffers
//
//-----------------------------------------------------------------------------

IMesh *CShaderAPIDx8::GetFlexMesh()
{
	LOCK_SHADERAPI();
	return MeshMgr()->GetFlexMesh();
}

//-----------------------------------------------------------------------------
// Gets the dynamic mesh
//-----------------------------------------------------------------------------
IMesh* CShaderAPIDx8::GetDynamicMesh( IMaterial* pMaterial, int nHWSkinBoneCount, bool buffered,
								IMesh* pVertexOverride, IMesh* pIndexOverride )
{
	Assert( (pMaterial == NULL) || ((IMaterialInternal *)pMaterial)->IsRealTimeVersion() );

	LOCK_SHADERAPI();
	return MeshMgr()->GetDynamicMesh( pMaterial, 0, nHWSkinBoneCount, buffered, pVertexOverride, pIndexOverride );
}

IMesh* CShaderAPIDx8::GetDynamicMeshEx( IMaterial* pMaterial, VertexFormat_t vertexFormat, int nHWSkinBoneCount, 
	bool bBuffered, IMesh* pVertexOverride, IMesh* pIndexOverride )
{
	Assert( (pMaterial == NULL) || ((IMaterialInternal *)pMaterial)->IsRealTimeVersion() );

	LOCK_SHADERAPI();
	return MeshMgr()->GetDynamicMesh( pMaterial, vertexFormat, nHWSkinBoneCount, bBuffered, pVertexOverride, pIndexOverride );
}

//-----------------------------------------------------------------------------
// Returns the number of vertices we can render using the dynamic mesh
//-----------------------------------------------------------------------------
void CShaderAPIDx8::GetMaxToRender( IMesh *pMesh, bool bMaxUntilFlush, int *pMaxVerts, int *pMaxIndices )
{
	LOCK_SHADERAPI();
	MeshMgr()->GetMaxToRender( pMesh, bMaxUntilFlush, pMaxVerts, pMaxIndices );
}

int CShaderAPIDx8::GetMaxVerticesToRender( IMaterial *pMaterial )
{
	pMaterial = ((IMaterialInternal *)pMaterial)->GetRealTimeVersion(); //always work with the realtime version internally

	LOCK_SHADERAPI();
	return MeshMgr()->GetMaxVerticesToRender( pMaterial );
}

int CShaderAPIDx8::GetMaxIndicesToRender( )
{
	LOCK_SHADERAPI();
	return MeshMgr()->GetMaxIndicesToRender( );
}

void CShaderAPIDx8::MarkUnusedVertexFields( unsigned int nFlags, int nTexCoordCount, bool *pUnusedTexCoords )
{
	LOCK_SHADERAPI();
	MeshMgr()->MarkUnusedVertexFields( nFlags, nTexCoordCount, pUnusedTexCoords );
}

//-----------------------------------------------------------------------------
// Draws the mesh
//-----------------------------------------------------------------------------
void CShaderAPIDx8::DrawMesh( CMeshBase *pMesh )
{
	VPROF("CShaderAPIDx8::DrawMesh");
	if ( ShaderUtil()->GetConfig().m_bSuppressRendering )
		return;

#if defined( PIX_INSTRUMENTATION ) || defined( NVPERFHUD )
	PIXifyName( s_pPIXMaterialName, sizeof( s_pPIXMaterialName ), m_pMaterial->GetName() );
	BeginPIXEvent( PIX_VALVE_ORANGE, s_pPIXMaterialName );
#endif

	m_pRenderMesh = pMesh;
	VertexFormat_t vertexFormat = m_pRenderMesh->GetVertexFormat();
	SetVertexDecl( vertexFormat, m_pRenderMesh->HasColorMesh(), m_pRenderMesh->HasFlexMesh(), m_pMaterial->IsUsingVertexID() );
	CommitStateChanges();
	Assert( m_pRenderMesh && m_pMaterial );
	m_pMaterial->DrawMesh( CompressionType( vertexFormat ) );
	m_pRenderMesh = NULL;

#if defined( PIX_INSTRUMENTATION ) || defined( NVPERFHUD )
	EndPIXEvent();
#endif
}

void CShaderAPIDx8::DrawWithVertexAndIndexBuffers( void )
{
	VPROF("CShaderAPIDx8::DrawWithVertexAndIndexBuffers");
	if ( ShaderUtil()->GetConfig().m_bSuppressRendering )
		return;

#if defined( PIX_INSTRUMENTATION ) || defined( NVPERFHUD )
	PIXifyName( s_pPIXMaterialName, sizeof( s_pPIXMaterialName ), m_pMaterial->GetName());
	BeginPIXEvent( PIX_VALVE_ORANGE, s_pPIXMaterialName );
#endif

//	m_pRenderMesh = pMesh;
	// FIXME: need to make this deal with multiple streams, etc.
	VertexFormat_t vertexFormat = MeshMgr()->GetCurrentVertexFormat();
	SetVertexDecl( vertexFormat, false /*m_pRenderMesh->HasColorMesh()*/, 
		false /*m_pRenderMesh->HasFlexMesh()*/, false /*m_pRenderMesh->IsUsingMorphData()*/ );
	CommitStateChanges();
	if ( m_pMaterial )
	{
		m_pMaterial->DrawMesh( CompressionType( vertexFormat ) );
	}
	else
	{
		MeshMgr()->RenderPassWithVertexAndIndexBuffers();
	}
//	m_pRenderMesh = NULL;

#if defined( PIX_INSTRUMENTATION ) || defined( NVPERFHUD )
	EndPIXEvent();
#endif
}

//-----------------------------------------------------------------------------
// Discards the vertex buffers
//-----------------------------------------------------------------------------
void CShaderAPIDx8::DiscardVertexBuffers()
{
	MeshMgr()->DiscardVertexBuffers();
}

void CShaderAPIDx8::ForceHardwareSync_WithManagedTexture()
{
	if ( IsX360() || !m_pFrameSyncTexture )
		return;

	// Set the default state for everything so we don't get more than we ask for here!
	SetDefaultState();

	D3DLOCKED_RECT rect;

	tmZone( TELEMETRY_LEVEL1, TMZF_NONE, "%s", __FUNCTION__ );

	HRESULT hr = m_pFrameSyncTexture->LockRect( 0, &rect, NULL, 0 );
	if ( SUCCEEDED( hr ) )
	{
		// modify..
		unsigned long *pData = (unsigned long*)rect.pBits;
		(*pData)++; 

		m_pFrameSyncTexture->UnlockRect( 0 );

		// Now draw something with this texture.
		DWORD iStage = 0;
		IDirect3DBaseTexture9 *pOldTexture;
		hr = Dx9Device()->GetTexture( iStage, &pOldTexture );
		if ( SUCCEEDED( hr ) )
		{
			Dx9Device()->SetTexture( iStage, m_pFrameSyncTexture );
			// Remember the old FVF.
			DWORD oldFVF;
			hr = Dx9Device()->GetFVF( &oldFVF );
			if ( SUCCEEDED( hr ) )
			{
				// Set the new FVF.
				Dx9Device()->SetFVF( D3DFVF_XYZ );
				// Now, draw the simplest primitive D3D has ever seen.
				unsigned short indices[3] = { 0, 1, 2 };
				Vector verts[3] = {vec3_origin, vec3_origin, vec3_origin};
				
				tmZone( TELEMETRY_LEVEL1, TMZF_NONE, "DrawIndexedPrimitiveUP" );

				Dx9Device()->DrawIndexedPrimitiveUP(
					D3DPT_TRIANGLELIST,
					0,				// Min vertex index
					3,				// Num vertices used
					1,				// # primitives
					indices,		// indices
					D3DFMT_INDEX16,	// index format
					verts,			// Vertices
					sizeof( Vector )// Vertex stride
					);
				
				Dx9Device()->SetFVF( oldFVF );
			}
			Dx9Device()->SetTexture( iStage, pOldTexture );
		}
	}
	// If this assert fails, then we failed somewhere above.
	AssertOnce( SUCCEEDED( hr ) );
}

void CShaderAPIDx8::UpdateFrameSyncQuery( int queryIndex, bool bIssue )
{
	Assert(queryIndex < NUM_FRAME_SYNC_QUERIES);
	// wait if already issued
	if ( m_bQueryIssued[queryIndex] )
	{
		tmZone( TELEMETRY_LEVEL1, TMZF_NONE, "%s", __FUNCTION__ );

		double flStartTime = Plat_FloatTime();
		BOOL dummyData = 0;
		HRESULT hr = S_OK;
		// NOTE: This fix helps out motherboards that are a little freaky.
		// On such boards, sometimes the driver has to reset itself (an event which takes several seconds)
		// and when that happens, the frame sync query object gets lost
		for (;;)
		{
			hr = m_pFrameSyncQueryObject[queryIndex]->GetData( &dummyData, sizeof( dummyData ), D3DGETDATA_FLUSH );
			if ( hr != S_FALSE )
				break;
			double flCurrTime = Plat_FloatTime();
			// don't wait more than 200ms (5fps) for these
			if ( flCurrTime - flStartTime > 0.200f )
				break;
			// Avoid burning a full core while waiting for the query. Spinning can actually harm performance
			// because there might be driver threads that are trying to do work that end up starved, and the
			// power drawn by the CPU may take away from the power available to the integrated graphics chip.
			// A sleep of one millisecond should never be long enough to affect performance, especially since
			// this should only trigger when the CPU is already ahead of the GPU.
			// On L4D2/TF2 in GL mode this spinning was causing slowdowns.
			ThreadSleep( 1 );
		}
		m_bQueryIssued[queryIndex] = false;
		Assert(hr == S_OK || hr == D3DERR_DEVICELOST);

		if ( hr == D3DERR_DEVICELOST )
		{
			MarkDeviceLost( );
			return;
		}
	}
	if ( bIssue )
	{
		m_pFrameSyncQueryObject[queryIndex]->Issue( D3DISSUE_END );
		m_bQueryIssued[queryIndex] = true;
	}
}

void CShaderAPIDx8::ForceHardwareSync( void )
{
	LOCK_SHADERAPI();
	VPROF( "CShaderAPIDx8::ForceHardwareSync" );

#ifdef DX_TO_GL_ABSTRACTION
	if ( true )
#else
	if ( !mat_frame_sync_enable.GetInt() )
#endif
		return;

	// need to flush the dynamic buffer	and make sure the entire image is there
	FlushBufferedPrimitives();

	RECORD_COMMAND( DX8_HARDWARE_SYNC, 0 );

#if !defined( _X360 )
	// How do you query dx9 for how many frames behind the hardware is or, alternatively, how do you tell the hardware to never be more than N frames behind?
	// 1) The old QueryPendingFrameCount design was removed.  It was
	// a simple transaction with the driver through the 
	// GetDriverState, trivial for the drivers to lie.  We came up 
	// with a much better scheme for tracking pending frames where 
	// the driver can not lie without a possible performance loss:  
	// use the asynchronous query system with D3DQUERYTYPE_EVENT and 
	// data size 0.  When GetData returns S_OK for the query, you 
	// know that frame has finished.
	if ( mat_frame_sync_force_texture.GetBool() )
	{
		ForceHardwareSync_WithManagedTexture();
	}
	else if ( m_pFrameSyncQueryObject[0] )
	{
		// FIXME: Could install a callback into the materialsystem to do something while waiting for
		// the frame to finish (update sound, etc.)
		
		// Disable VCR mode here or else it'll screw up (and we don't really care if this part plays back in exactly the same amount of time).
		VCRSetEnabled( false );

		m_currentSyncQuery ++;
		if ( m_currentSyncQuery >= ARRAYSIZE(m_pFrameSyncQueryObject) )
		{
			m_currentSyncQuery = 0;
		}
		double fStart = Plat_FloatTime();
		int waitIndex = ((m_currentSyncQuery + NUM_FRAME_SYNC_QUERIES) - (NUM_FRAME_SYNC_FRAMES_LATENCY+1)) % NUM_FRAME_SYNC_QUERIES;
		UpdateFrameSyncQuery( waitIndex, false );
		UpdateFrameSyncQuery( m_currentSyncQuery, true );
		VCRSetEnabled( true );
	} 
#else
	DWORD hFence = Dx9Device()->InsertFence();
	Dx9Device()->BlockOnFence( hFence );
#endif
}


//-----------------------------------------------------------------------------
// Needs render state
//-----------------------------------------------------------------------------
void CShaderAPIDx8::QueueResetRenderState()
{
	m_bResetRenderStateNeeded = true;
}


//-----------------------------------------------------------------------------
// Use this to begin and end the frame
//-----------------------------------------------------------------------------
void CShaderAPIDx8::BeginFrame()
{
	LOCK_SHADERAPI();

	if ( m_bResetRenderStateNeeded )
	{
		ResetRenderState( false );
		m_bResetRenderStateNeeded = false;
	}

#if ALLOW_SMP_ACCESS
	Dx9Device()->SetASyncMode( mat_use_smp.GetInt() != 0 );
#endif

	++m_CurrentFrame;
	m_nTextureMemoryUsedLastFrame = 0;
}

void CShaderAPIDx8::EndFrame()
{
	LOCK_SHADERAPI();

#if !defined( _X360 )
	MEMCHECK;
#endif

	ExportTextureList();
}


void CShaderAPIDx8::AddBufferToTextureList( const char *pName, D3DSURFACE_DESC &desc )
{
//	ImageFormat imageFormat;
//	imageFormat = ImageLoader::D3DFormatToImageFormat( desc.Format );
//	if( imageFormat < 0 )
//	{
//		Assert( 0 );
//		return;
//	}
	KeyValues *pSubKey = m_pDebugTextureList->CreateNewKey();
	pSubKey->SetString( "Name", pName );
	pSubKey->SetString( "TexGroup", TEXTURE_GROUP_RENDER_TARGET );
	pSubKey->SetInt( "Size", 
//		ImageLoader::SizeInBytes( imageFormat ) * desc.Width * desc.Height );
		4 * desc.Width * desc.Height );
	pSubKey->SetString( "Format", "32 bit buffer (hack)" );//ImageLoader::GetName( imageFormat ) );
	pSubKey->SetInt( "Width", desc.Width );
	pSubKey->SetInt( "Height", desc.Height );
	
	pSubKey->SetInt( "BindsMax", 1 );
	pSubKey->SetInt( "BindsFrame", 1 );
}

void CShaderAPIDx8::ExportTextureList()
{
	if ( !m_bEnableDebugTextureList )
		return;

	if ( !m_pBackBufferSurface || !m_pZBufferSurface )
		// Device vanished...
		return;

	m_nDebugDataExportFrame = m_CurrentFrame;

	if ( IsPC() || !IsX360() )
	{
		if ( m_pDebugTextureList )
			m_pDebugTextureList->deleteThis();

		m_pDebugTextureList = new KeyValues( "TextureList" );

		m_nTextureMemoryUsedTotal = 0;
		m_nTextureMemoryUsedPicMip1 = 0;
		m_nTextureMemoryUsedPicMip2 = 0;
		for ( ShaderAPITextureHandle_t hTexture = m_Textures.Head() ; hTexture != m_Textures.InvalidIndex(); hTexture = m_Textures.Next( hTexture ) )
		{
			Texture_t &tex = m_Textures[hTexture];
		
			if ( !( tex.m_Flags & Texture_t::IS_ALLOCATED ) )
				continue;

			// Compute total texture memory usage
			m_nTextureMemoryUsedTotal += tex.GetMemUsage();

			// Compute picmip memory usage
			{
				int numBytes = tex.GetMemUsage();

				if ( tex.m_NumLevels > 1 )
				{
					if ( tex.GetWidth() > 4 || tex.GetHeight() > 4 || tex.GetDepth() > 4 )
					{
						int topmipsize = ImageLoader::GetMemRequired( tex.GetWidth(), tex.GetHeight(), tex.GetDepth(), tex.GetImageFormat(), false );
						numBytes -= topmipsize;

						m_nTextureMemoryUsedPicMip1 += numBytes;

						if ( tex.GetWidth() > 8 || tex.GetHeight() > 8 || tex.GetDepth() > 8 )
						{
							int othermipsizeRatio = ( ( tex.GetWidth() > 8 ) ? 2 : 1 ) * ( ( tex.GetHeight() > 8 ) ? 2 : 1 ) * ( ( tex.GetDepth() > 8 ) ? 2 : 1 );
							int othermipsize = topmipsize / othermipsizeRatio;
							numBytes -= othermipsize;
						}

						m_nTextureMemoryUsedPicMip1 += numBytes;
					}
					else
					{
						m_nTextureMemoryUsedPicMip1 += numBytes;
						m_nTextureMemoryUsedPicMip2 += numBytes;
					}
				}
				else
				{
					m_nTextureMemoryUsedPicMip1 += numBytes;
					m_nTextureMemoryUsedPicMip2 += numBytes;
				}
			}

			if ( !m_bDebugGetAllTextures &&
				  tex.m_LastBoundFrame != m_CurrentFrame )
				continue;

			if ( tex.m_LastBoundFrame != m_CurrentFrame )
				tex.m_nTimesBoundThisFrame = 0;

			KeyValues *pSubKey = m_pDebugTextureList->CreateNewKey();
			pSubKey->SetString( "Name", tex.m_DebugName.String() );
			pSubKey->SetString( "TexGroup", tex.m_TextureGroupName.String() );
			pSubKey->SetInt( "Size", tex.GetMemUsage() );
			if ( tex.GetCount() > 1 )
				pSubKey->SetInt( "Count", tex.GetCount() );
			pSubKey->SetString( "Format", ImageLoader::GetName( tex.GetImageFormat() ) );
			pSubKey->SetInt( "Width", tex.GetWidth() );
			pSubKey->SetInt( "Height", tex.GetHeight() );

			pSubKey->SetInt( "BindsMax", tex.m_nTimesBoundMax );
			pSubKey->SetInt( "BindsFrame", tex.m_nTimesBoundThisFrame );
		}

		D3DSURFACE_DESC desc;
		m_pBackBufferSurface->GetDesc( &desc );
		AddBufferToTextureList( "BACKBUFFER", desc );
		AddBufferToTextureList( "FRONTBUFFER", desc );
	//	ImageFormat imageFormat = ImageLoader::D3DFormatToImageFormat( desc.Format );
	//	if( imageFormat >= 0 )
		{
			VPROF_INCREMENT_GROUP_COUNTER( "TexGroup_frame_" TEXTURE_GROUP_RENDER_TARGET, 
				COUNTER_GROUP_TEXTURE_PER_FRAME, 
	//			ImageLoader::SizeInBytes( imageFormat ) * desc.Width * desc.Height );
				2 * 4 * desc.Width * desc.Height ); // hack (times 2 for front and back buffer)
		}

		m_pZBufferSurface->GetDesc( &desc );
		AddBufferToTextureList( "DEPTHBUFFER", desc );
	//	imageFormat = ImageLoader::D3DFormatToImageFormat( desc.Format );
	//	if( imageFormat >= 0 )
		{
			VPROF_INCREMENT_GROUP_COUNTER( "TexGroup_frame_" TEXTURE_GROUP_RENDER_TARGET, 
				COUNTER_GROUP_TEXTURE_PER_FRAME, 
	//			ImageLoader::SizeInBytes( imageFormat ) * desc.Width * desc.Height );
				4 * desc.Width * desc.Height ); // hack
		}
	}

#if defined( _X360 )
	// toggle to do one shot transmission
	m_bEnableDebugTextureList = false;

	int numTextures = m_Textures.Count() + 3;
	xTextureList_t* pXTextureList = (xTextureList_t *)_alloca( numTextures * sizeof( xTextureList_t ) );
	memset( pXTextureList, 0, numTextures * sizeof( xTextureList_t ) );

	numTextures = 0;
	for ( ShaderAPITextureHandle_t hTexture = m_Textures.Head() ; hTexture != m_Textures.InvalidIndex(); hTexture = m_Textures.Next( hTexture ) )
	{
		Texture_t &tex = m_Textures[hTexture];
	
		if ( !m_bDebugGetAllTextures && tex.m_LastBoundFrame != m_CurrentFrame )
		{
			continue;
		}
		if ( !( tex.m_Flags & Texture_t::IS_ALLOCATED ) )
		{
			continue;
		}

		int refCount;
		if ( tex.m_Flags & Texture_t::IS_DEPTH_STENCIL )
		{
			// interface forces us to ignore these
			refCount =  -1;
		}
		else
		{
			refCount = GetD3DTextureRefCount( CShaderAPIDx8::GetD3DTexture( hTexture ) );
		}

		pXTextureList[numTextures].pName = tex.m_DebugName.String();
		pXTextureList[numTextures].size = tex.m_SizeBytes * tex.m_NumCopies;
		pXTextureList[numTextures].pGroupName = tex.m_TextureGroupName.String();
		pXTextureList[numTextures].pFormatName = D3DFormatName( ImageLoader::ImageFormatToD3DFormat( tex.GetImageFormat() ) );
		pXTextureList[numTextures].width = tex.GetWidth();
		pXTextureList[numTextures].height = tex.GetHeight();
		pXTextureList[numTextures].depth = tex.GetDepth();
		pXTextureList[numTextures].numLevels = tex.m_NumLevels;
		pXTextureList[numTextures].binds = tex.m_nTimesBoundThisFrame;
		pXTextureList[numTextures].refCount = refCount;
		pXTextureList[numTextures].edram = ( tex.m_Flags & Texture_t::IS_RENDER_TARGET_SURFACE ) != 0;
		pXTextureList[numTextures].procedural = tex.m_NumCopies > 1;
		pXTextureList[numTextures].final = ( tex.m_Flags & Texture_t::IS_FINALIZED ) != 0;
		pXTextureList[numTextures].failed = ( tex.m_Flags & Texture_t::IS_FAILED ) != 0;
		numTextures++;
	}

	// build special entries for implicit surfaces/textures
	D3DSURFACE_DESC desc;
	m_pBackBufferSurface->GetDesc( &desc );
	int size = ImageLoader::GetMemRequired( 
		desc.Width,
		desc.Height,
		0,
		ImageLoader::D3DFormatToImageFormat( desc.Format ),
		false );
	pXTextureList[numTextures].pName = "_rt_BackBuffer";
	pXTextureList[numTextures].size = size;
	pXTextureList[numTextures].pGroupName = TEXTURE_GROUP_RENDER_TARGET_SURFACE;
	pXTextureList[numTextures].pFormatName = D3DFormatName( desc.Format );
	pXTextureList[numTextures].width = desc.Width;
	pXTextureList[numTextures].height = desc.Height;
	pXTextureList[numTextures].depth = 1;
	pXTextureList[numTextures].binds = 1;
	pXTextureList[numTextures].refCount = 1;
	pXTextureList[numTextures].sRGB = IS_D3DFORMAT_SRGB( desc.Format );
	pXTextureList[numTextures].edram = true;
	numTextures++;

	m_pZBufferSurface->GetDesc( &desc );
	pXTextureList[numTextures].pName = "_rt_DepthBuffer";
	pXTextureList[numTextures].size = size;
	pXTextureList[numTextures].pGroupName = TEXTURE_GROUP_RENDER_TARGET_SURFACE;
	pXTextureList[numTextures].pFormatName = D3DFormatName( desc.Format );
	pXTextureList[numTextures].width = desc.Width;
	pXTextureList[numTextures].height = desc.Height;
	pXTextureList[numTextures].depth = 1;
	pXTextureList[numTextures].binds = 1;
	pXTextureList[numTextures].refCount = 1;
	pXTextureList[numTextures].sRGB = IS_D3DFORMAT_SRGB( desc.Format );
	pXTextureList[numTextures].edram = true;
	numTextures++;

	// front buffer resides in DDR
	pXTextureList[numTextures].pName = "_rt_FrontBuffer";
	pXTextureList[numTextures].size = size;
	pXTextureList[numTextures].pGroupName = TEXTURE_GROUP_RENDER_TARGET;
	pXTextureList[numTextures].pFormatName = D3DFormatName( desc.Format );
	pXTextureList[numTextures].width = desc.Width;
	pXTextureList[numTextures].height = desc.Height;
	pXTextureList[numTextures].depth = 1;
	pXTextureList[numTextures].binds = 1;
	pXTextureList[numTextures].refCount = 1;
	pXTextureList[numTextures].sRGB = IS_D3DFORMAT_SRGB( desc.Format );
	numTextures++;

	int totalMemory = 0;
	for ( int i = 0; i < numTextures; i++ )
	{
		if ( pXTextureList[i].edram )
		{
			// skip edram based items
			continue;
		}
		totalMemory += pXTextureList[i].size;
	}
	Msg( "Total D3D Texture Memory: %.2f MB\n", (float)totalMemory/( 1024.0f * 1024.0f ) );

	// transmit to console
	XBX_rTextureList( numTextures, pXTextureList );
#endif
}


//-----------------------------------------------------------------------------
// Releases/reloads resources when other apps want some memory
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ReleaseShaderObjects()
{
	ReleaseInternalRenderTargets();
	EvictManagedResourcesInternal();	

	// FIXME: Move into shaderdevice when textures move over.

#ifdef _DEBUG
	// Helps to find the unreleased textures.
	if ( TextureCount() > 0 )
	{
		ShaderAPITextureHandle_t hTexture;
		for ( hTexture = m_Textures.Head(); hTexture != m_Textures.InvalidIndex(); hTexture = m_Textures.Next( hTexture ) )
		{
			if ( GetTexture( hTexture ).m_NumCopies == 1 )
			{
				if ( GetTexture( hTexture ).GetTexture() )
				{
					Warning( "Didn't correctly clean up texture 0x%8.8x (%s)\n", hTexture, GetTexture( hTexture ).m_DebugName.String() ); 
				}
			}
			else
			{
				for ( int k = GetTexture( hTexture ).m_NumCopies; --k >= 0; )
				{
					if ( GetTexture( hTexture ).GetTexture( k ) != 0 )
					{
						Warning( "Didn't correctly clean up texture 0x%8.8x (%s)\n", hTexture, GetTexture( hTexture ).m_DebugName.String() ); 
						break;
					}
				}
			}
		}
	}
#endif

	Assert( TextureCount() == 0 );
}

void CShaderAPIDx8::RestoreShaderObjects()
{
	AcquireInternalRenderTargets();
	SetRenderTarget();
}


//--------------------------------------------------------------------
// PIX instrumentation routines
// Windows only for now.  Turn these on with PIX_INSTRUMENTATION above
//--------------------------------------------------------------------

#if 0	// hack versions for OSX to be able to PIX log even when not built debug...
void CShaderAPIDx8::BeginPIXEvent( unsigned long color, const char* szName )
	{
		LOCK_SHADERAPI();
		GLMBeginPIXEvent( szName );	// direct call no macro
		return;
	}

	void CShaderAPIDx8::EndPIXEvent( void )
	{
		LOCK_SHADERAPI();
		GLMEndPIXEvent();			// direct call no macro
		return;
	}
	
#else

#if defined( PIX_INSTRUMENTATION ) || defined( NVPERFHUD )
ConVar pix_break_on_event( "pix_break_on_event", "" );
#endif

void CShaderAPIDx8::BeginPIXEvent( unsigned long color, const char* szName )
{
#if ( defined( PIX_INSTRUMENTATION ) || defined( NVPERFHUD ) )
	//LOCK_SHADERAPI();

	const char *p = pix_break_on_event.GetString();
	if ( p && V_strlen( p ) )
	{
		if ( V_stristr( szName, p ) != NULL )
		{
			DebuggerBreak();
		}
	}

#if defined ( DX_TO_GL_ABSTRACTION )
	GLMBeginPIXEvent( szName );

#if defined( _WIN32 )
	// AMD PerfStudio integration: Call into D3D9.DLL's D3DPERF_BeginEvent() (this gets intercepted by PerfStudio even in GL mode).
	if ( g_pShaderDeviceMgrDx8->m_pBeginEvent )
	{
		wchar_t wszName[128];
		mbstowcs( wszName, szName, 128 );

		g_pShaderDeviceMgrDx8->m_pBeginEvent( 0x2F2F2F2F, wszName );
	}
#endif
#elif defined(_X360 )
#ifndef _DEBUG
	char szPIXEventName[32];
	PIXifyName( szPIXEventName, szName );
	PIXBeginNamedEvent( color, szPIXEventName );
#endif
#else // PC
	if ( PIXError() )
		return;

	wchar_t wszName[128];
	mbstowcs( wszName, szName, 128 );

	// Fire the PIX event, trapping for errors...
	if ( D3DPERF_BeginEvent( color, wszName ) < 0 )
	{
		Warning( "PIX error Beginning %s event\n", szName );
		IncrementPIXError();
	}
#endif
#endif // #if defined( PIX_INSTRUMENTATION )
}

void CShaderAPIDx8::EndPIXEvent( void )
{
#if ( defined( PIX_INSTRUMENTATION ) || defined( NVPERFHUD ) )
	//LOCK_SHADERAPI();

#if defined ( DX_TO_GL_ABSTRACTION )
	GLMEndPIXEvent();

#if defined( _WIN32 )
	// AMD PerfStudio integration: Call into D3D9.DLL's D3DPERF_EndEvent() (this gets intercepted by PerfStudio even in GL mode).
	if ( g_pShaderDeviceMgrDx8->m_pEndEvent )
	{
		g_pShaderDeviceMgrDx8->m_pEndEvent();
	}
#endif
#elif defined( _X360 )
#ifndef _DEBUG
	PIXEndNamedEvent();
#endif
#else // PC
	if ( PIXError() )
		return;

#if !defined( NVPERFHUD )
	// Fire the PIX event, trapping for errors...
	if ( D3DPERF_EndEvent() < 0 )
	{
		Warning("PIX error ending event\n");
		IncrementPIXError();
	}
#endif
#endif
#endif // #if defined( PIX_INSTRUMENTATION )
}

#endif
	
void CShaderAPIDx8::AdvancePIXFrame()
{
#if defined( PIX_INSTRUMENTATION )
	// Ping PIX when this bool goes from false to true
	if ( r_pix_start.GetBool() && (!m_bPixCapturing) )
	{
		StartPIXInstrumentation();
		m_bPixCapturing = true;
	}

	// If we want to record frames...
	if ( r_pix_recordframes.GetInt() )
	{
		if ( m_nPixFrame == 0 )									// First frame to record
		{
			StartPIXInstrumentation();
			m_nPixFrame++;
		}
		else if( m_nPixFrame == r_pix_recordframes.GetInt() )	// Last frame to record
		{
			EndPIXInstrumentation();
			r_pix_recordframes.SetValue(0);
			m_nPixFrame = 0;
		}
		else
		{
			m_nPixFrame++;										// Recording frames...
		}
	}
#endif
}

// No begin-end for this...use this to put discrete markers in the PIX stream
void CShaderAPIDx8::SetPIXMarker( unsigned long color, const char* szName )
{
#if defined( PIX_INSTRUMENTATION )
	LOCK_SHADERAPI();

#if defined( DX_TO_GL_ABSTRACTION )
	if ( g_pShaderDeviceMgrDx8->m_pSetMarker )
	{
		wchar_t wszName[128];
		mbstowcs(wszName, szName, 128 );
		g_pShaderDeviceMgrDx8->m_pSetMarker( 0x2F2F2F2F, wszName );
	}
#elif defined( _X360 )
#ifndef _DEBUG
	char szPIXMarkerName[32];
	PIXifyName( szPIXMarkerName, szName );
	PIXSetMarker( color, szPIXMarkerName );
#endif
#else // PC
	if ( PIXError() )
		return;
	wchar_t wszName[128];
	mbstowcs(wszName, szName, 128 );
	D3DPERF_SetMarker( color, wszName );
#endif

#endif  // PIX_INSTRUMENTATION
}

void CShaderAPIDx8::StartPIXInstrumentation()
{
#if defined( PIX_INSTRUMENTATION )
	SetPIXMarker( PIX_VALVE_ORANGE, "Valve_PIX_Capture_Start" );
#endif
}

void CShaderAPIDx8::EndPIXInstrumentation()
{
#if defined( PIX_INSTRUMENTATION )
	SetPIXMarker( PIX_VALVE_ORANGE, "Valve_PIX_Capture_End" );
#endif
}

void CShaderAPIDx8::IncrementPIXError()
{
#if defined( PIX_INSTRUMENTATION ) && !defined( NVPERFHUD )
	m_nPIXErrorCount++;
	if ( m_nPIXErrorCount >= MAX_PIX_ERRORS )
	{
		Warning( "Source engine built with PIX instrumentation, but PIX doesn't seem to have been used to instantiate the game, which is necessary on PC.\n" );
	}
#endif
}

// Have we already hit several PIX errors?
bool CShaderAPIDx8::PIXError()
{
#if defined( PIX_INSTRUMENTATION ) && !defined( NVPERFHUD )
	return m_nPIXErrorCount >= MAX_PIX_ERRORS;
#else
	return false;
#endif
}


//-----------------------------------------------------------------------------
// Check for device lost
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ChangeVideoMode( const ShaderDeviceInfo_t &info )
{
	if ( IsX360() )
		return;

	LOCK_SHADERAPI();

	m_PendingVideoModeChangeConfig = info;
	m_bPendingVideoModeChange = true;

	if ( info.m_DisplayMode.m_nWidth != 0 && info.m_DisplayMode.m_nHeight != 0 )
	{
		m_nWindowWidth = info.m_DisplayMode.m_nWidth;
		m_nWindowHeight = info.m_DisplayMode.m_nHeight;
	}
}


//-----------------------------------------------------------------------------
// Compute fill rate
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ComputeFillRate()
{
	if ( IsX360() )
	{
		// not valid
		return;
	}

	static unsigned char* pBuf = 0;

	int width, height;
	GetWindowSize( width, height );
	// Snapshot; look at total # pixels drawn...
	if ( !pBuf )
	{
		int memSize = ShaderUtil()->GetMemRequired(
									width, 
									height, 
									1, 
									IMAGE_FORMAT_RGB888, 
									false ) + 4;

		pBuf = (unsigned char*)malloc( memSize );
	}

	ReadPixels( 
		0, 
		0, 
		width, 
		height, 
		pBuf, 
		IMAGE_FORMAT_RGB888 );

	int mask = 0xFF;
	int count = 0;
	unsigned char* pRead = pBuf;
	for (int i = 0; i < height; ++i)
	{
		for (int j = 0; j < width; ++j)
		{
			int val = *(int*)pRead;
			count += (val & mask);
			pRead += 3;
		}
	}
}

//-----------------------------------------------------------------------------
// Use this to get the mesh builder that allows us to modify vertex data
//-----------------------------------------------------------------------------
CMeshBuilder* CShaderAPIDx8::GetVertexModifyBuilder()
{
	return &m_ModifyBuilder;
}

bool CShaderAPIDx8::InFlashlightMode() const
{
	return ShaderUtil()->InFlashlightMode();
}

bool CShaderAPIDx8::InEditorMode() const
{
	return ShaderUtil()->InEditorMode();
}

//-----------------------------------------------------------------------------
// Gets the bound morph's vertex format; returns 0 if no morph is bound
//-----------------------------------------------------------------------------
MorphFormat_t CShaderAPIDx8::GetBoundMorphFormat()
{
	return ShaderUtil()->GetBoundMorphFormat();
}

//-----------------------------------------------------------------------------
// returns the current time in seconds...
//-----------------------------------------------------------------------------
double CShaderAPIDx8::CurrentTime() const
{
	// FIXME: Return game time instead of real time!
	// Or eliminate this altogether and put it into a material var
	// (this is used by vertex modifiers in shader code at the moment)
	return Plat_FloatTime();
}


//-----------------------------------------------------------------------------
// Methods called by the transition table that use dynamic state...
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ApplyZBias( const ShadowState_t& shaderState )
{
	MaterialSystem_Config_t &config = ShaderUtil()->GetConfig();
    float a = (config.m_SlopeScaleDepthBias_Decal != 0.0f) ? 1.0f / config.m_SlopeScaleDepthBias_Decal : 0.0f;
    float b = (config.m_SlopeScaleDepthBias_Normal != 0.0f) ? 1.0f / config.m_SlopeScaleDepthBias_Normal : 0.0f;
    float c = (config.m_DepthBias_Decal != 0.0f) ? 1.0f / config.m_DepthBias_Decal : 0.0f;
    float d = (config.m_DepthBias_Normal != 0.0f) ? 1.0f / config.m_DepthBias_Normal : 0.0f;

	// FIXME: No longer necessary; may be necessary if you want to use cat 4.3 drivers?
	// GR - hack for R200
	bool bPS14Only = g_pHardwareConfig->Caps().m_SupportsPixelShaders_1_4 && !g_pHardwareConfig->Caps().m_SupportsPixelShaders_2_0;
	if( ( g_pHardwareConfig->Caps().m_VendorID == 0x1002 ) && bPS14Only )
	{
		// Slam to m_SlopeScaleDepthBias_Decal = 0, m_DepthBias_Decal = -4096
		// which empirically is what appears to look good on r200
		// NOTE: Slamming to 0 instead of -1.0 / 4096 because on Cat 4.9, WinXP, 8500,
		// this causes the z values to be more than 50 units away from the original z values

		a = 0.0f;
		c = -1.0/4096.0;
	}

	// bias = (s * D3DRS_SLOPESCALEDEPTHBIAS) + D3DRS_DEPTHBIAS, where s is the maximum depth slope of the triangle being rendered
    if ( g_pHardwareConfig->Caps().m_ZBiasAndSlopeScaledDepthBiasSupported )
    {
		float fSlopeScaleDepthBias, fDepthBias;
		if ( shaderState.m_ZBias == SHADER_POLYOFFSET_DECAL )
		{
			fSlopeScaleDepthBias = a;
			fDepthBias = c;
		}
		else if ( shaderState.m_ZBias == SHADER_POLYOFFSET_SHADOW_BIAS )
		{
			fSlopeScaleDepthBias = m_fShadowSlopeScaleDepthBias;
			fDepthBias = m_fShadowDepthBias;
		}
		else // assume SHADER_POLYOFFSET_DISABLE
		{
			fSlopeScaleDepthBias = b;
			fDepthBias = d;
		}

		if( ReverseDepthOnX360() )
		{
			fSlopeScaleDepthBias = -fSlopeScaleDepthBias;
			fDepthBias = -fDepthBias;
		}

		SetRenderStateConstMacro( this, D3DRS_SLOPESCALEDEPTHBIAS, *((DWORD*) (&fSlopeScaleDepthBias)) );
		SetRenderStateConstMacro( this, D3DRS_DEPTHBIAS, *((DWORD*) (&fDepthBias)) );
	} 
	else
	{
		MarkAllUserClipPlanesDirty();
		m_DynamicState.m_TransformChanged[MATERIAL_PROJECTION] |= 
			STATE_CHANGED_VERTEX_SHADER | STATE_CHANGED_FIXED_FUNCTION;
	}
}

void CShaderAPIDx8::ApplyTextureEnable( const ShadowState_t& state, int nSampler )
{
	if ( state.m_SamplerState[nSampler].m_TextureEnable == SamplerState(nSampler).m_TextureEnable )
		return;

	if ( state.m_SamplerState[nSampler].m_TextureEnable )
	{
		SamplerState( nSampler ).m_TextureEnable = true;

		// Should not be necessary/possible (SetTextureState() calls D3D9/DXAbstract, so the calling thread must already own the device.
		//LOCK_SHADERAPI();

		// Don't do this here!!  It ends up giving us extra texture sets.
		// We'll Assert in debug mode if you enable a texture stage
		// but don't bind a texture.
		// see CShaderAPIDx8::RenderPass() for this check.
		// NOTE: We aren't doing this optimization quite yet.  There are situations
		// where you want a texture stage enabled for its texture coordinates, but
		// you don't actually bind a texture (texmvspec for example.)
		SetTextureState( (Sampler_t)nSampler, SamplerState(nSampler).m_BoundTexture, true );
	}
	else
	{
		SamplerState( nSampler ).m_TextureEnable = false;
		SetTextureState( (Sampler_t)nSampler, INVALID_SHADERAPI_TEXTURE_HANDLE );
	}
}


//-----------------------------------------------------------------------------
// Used to clear the transition table when we know it's become invalid.
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ClearSnapshots()
{
	LOCK_SHADERAPI();
	FlushBufferedPrimitives();
	m_TransitionTable.Reset();
	InitRenderState();
}


static void KillTranslation( D3DXMATRIX& mat )
{
	mat[3] = 0.0f;
	mat[7] = 0.0f;
	mat[11] = 0.0f;
	mat[12] = 0.0f;
	mat[13] = 0.0f;
	mat[14] = 0.0f;
	mat[15] = 1.0f;
}

static void PrintMatrix( const char *name, const D3DXMATRIX& mat )
{
	int row, col;
	char buf[128];

	Plat_DebugString( name );
	Plat_DebugString( "\n" );
	for( row = 0; row < 4; row++ )
	{
		Plat_DebugString( "    " );
		for( col = 0; col < 4; col++ )
		{
			sprintf( buf, "%f ", ( float )mat( row, col ) );
			Plat_DebugString( buf );
		}
		Plat_DebugString( "\n" );
	}
	Plat_DebugString( "\n" );
}


//-----------------------------------------------------------------------------
// Gets the vertex format for a particular snapshot id
//-----------------------------------------------------------------------------
VertexFormat_t CShaderAPIDx8::ComputeVertexUsage( int num, StateSnapshot_t* pIds ) const
{
	LOCK_SHADERAPI();
	if (num == 0)
		return 0;

	// We don't have to all sorts of crazy stuff if there's only one snapshot
	if ( num == 1 )
	{
		const ShadowShaderState_t& state = m_TransitionTable.GetSnapshotShader( pIds[0] );
		return state.m_VertexUsage;
	}

	Assert( pIds );

	// Aggregating vertex formats is a little tricky;
	// For example, what do we do when two passes want user data? 
	// Can we assume they are the same? For now, I'm going to
	// just print a warning in debug.

	VertexCompressionType_t compression = VERTEX_COMPRESSION_INVALID;
	int userDataSize = 0;
	int numBones = 0;
	int texCoordSize[VERTEX_MAX_TEXTURE_COORDINATES] = { 0, 0, 0, 0, 0, 0, 0, 0 };
	int flags = 0;

	for (int i = num; --i >= 0; )
	{
		const ShadowShaderState_t& state = m_TransitionTable.GetSnapshotShader( pIds[i] );
		VertexFormat_t fmt = state.m_VertexUsage;
		flags |= VertexFlags(fmt);

		VertexCompressionType_t newCompression = CompressionType( fmt );
		if ( ( compression != newCompression ) && ( compression != VERTEX_COMPRESSION_INVALID ) )
		{
			Warning("Encountered a material with two passes that specify different vertex compression types!\n");
			compression = VERTEX_COMPRESSION_NONE; // Be safe, disable compression
		}

		int newNumBones = NumBoneWeights(fmt);
		if ((numBones != newNumBones) && (newNumBones != 0))
		{
			if (numBones != 0)
			{
				Warning("Encountered a material with two passes that use different numbers of bones!\n");
			}
			numBones = newNumBones;
		}

		int newUserSize = UserDataSize(fmt);
		if ((userDataSize != newUserSize) && (newUserSize != 0))
		{
			if (userDataSize != 0)
			{
				Warning("Encountered a material with two passes that use different user data sizes!\n");
			}
			userDataSize = newUserSize;
		}

		for ( int j = 0; j < VERTEX_MAX_TEXTURE_COORDINATES; ++j )
		{
			int newSize = TexCoordSize( (TextureStage_t)j, fmt );
			if ( ( texCoordSize[j] != newSize ) && ( newSize != 0 ) )
			{
				if ( texCoordSize[j] != 0 ) 
				{
					Warning("Encountered a material with two passes that use different texture coord sizes!\n");
				}
				if ( texCoordSize[j] < newSize )
				{
					texCoordSize[j] = newSize;
				}
			}
		}
	}

	return MeshMgr()->ComputeVertexFormat( flags, VERTEX_MAX_TEXTURE_COORDINATES, 
		texCoordSize, numBones, userDataSize );
}

VertexFormat_t CShaderAPIDx8::ComputeVertexFormat( int num, StateSnapshot_t* pIds ) const
{
	LOCK_SHADERAPI();
	VertexFormat_t fmt = ComputeVertexUsage( num, pIds );
	return fmt;
}


//-----------------------------------------------------------------------------
// What fields in the morph do we actually use?
//-----------------------------------------------------------------------------
MorphFormat_t CShaderAPIDx8::ComputeMorphFormat( int numSnapshots, StateSnapshot_t* pIds ) const
{
	LOCK_SHADERAPI();
	MorphFormat_t format = 0;
	for ( int i = 0; i < numSnapshots; ++i )
	{
		MorphFormat_t fmt = m_TransitionTable.GetSnapshotShader( pIds[i] ).m_MorphUsage;
		format |= VertexFlags(fmt);
	}
	return format;
}

//-----------------------------------------------------------------------------
// Commits a range of vertex shader constants
//-----------------------------------------------------------------------------
static void CommitVertexShaderConstantRange( IDirect3DDevice9 *pDevice, const DynamicState_t &desiredState,
	DynamicState_t &currentState, bool bForce, int nFirstConstant, int nCount )
{
	if ( IsX360() )
	{
		// invalid code path for 360, not coded for 360 GPU contant awareness
		Assert( 0 );
		return;
	}

	int nFirstCommit = nFirstConstant;
	int nCommitCount = 0;

	for ( int i = 0; i < nCount; ++i )
	{
		int nVar = nFirstConstant + i; 

		bool bDifferentValue = bForce || ( desiredState.m_pVectorVertexShaderConstant[nVar] != currentState.m_pVectorVertexShaderConstant[nVar] );
		if ( !bDifferentValue )
		{
			if ( nCommitCount != 0 )
			{
				// flush the prior range
				pDevice->SetVertexShaderConstantF( nFirstCommit, desiredState.m_pVectorVertexShaderConstant[nFirstCommit].Base(), nCommitCount );

				memcpy( &currentState.m_pVectorVertexShaderConstant[nFirstCommit], 
					&desiredState.m_pVectorVertexShaderConstant[nFirstCommit], nCommitCount * 4 * sizeof(float) );
			}

			// start of new range
			nFirstCommit = nVar + 1;
			nCommitCount = 0;
		}
		else
		{
			++nCommitCount;
		}
	}

	if ( nCommitCount != 0 )
	{
		// flush range
		pDevice->SetVertexShaderConstantF( nFirstCommit, desiredState.m_pVectorVertexShaderConstant[nFirstCommit].Base(), nCommitCount );

		memcpy( &currentState.m_pVectorVertexShaderConstant[nFirstCommit], 
			&desiredState.m_pVectorVertexShaderConstant[nFirstCommit], nCommitCount * 4 * sizeof(float) );
	}
}


//-----------------------------------------------------------------------------
// Gets the current buffered state... (debug only)
//-----------------------------------------------------------------------------
void CShaderAPIDx8::GetBufferedState( BufferedState_t& state )
{
	memcpy( &state.m_Transform[0], &GetTransform(MATERIAL_MODEL), sizeof(D3DXMATRIX) ); 
	memcpy( &state.m_Transform[1], &GetTransform(MATERIAL_VIEW), sizeof(D3DXMATRIX) ); 
	memcpy( &state.m_Transform[2], &GetTransform(MATERIAL_PROJECTION), sizeof(D3DXMATRIX) ); 
	memcpy( &state.m_Viewport, &m_DynamicState.m_Viewport, sizeof(state.m_Viewport) );
	state.m_PixelShader = ShaderManager()->GetCurrentPixelShader();
	state.m_VertexShader = ShaderManager()->GetCurrentVertexShader();
	for (int i = 0; i < g_pHardwareConfig->GetSamplerCount(); ++i)
	{
		state.m_BoundTexture[i] = m_DynamicState.m_SamplerState[i].m_BoundTexture;
	}
}


//-----------------------------------------------------------------------------
// constant color methods
//-----------------------------------------------------------------------------

void CShaderAPIDx8::Color3f( float r, float g, float b )
{
	unsigned int color = D3DCOLOR_ARGB( 255, (int)(r * 255), 
		(int)(g * 255), (int)(b * 255) );
	if (color != m_DynamicState.m_ConstantColor)
	{
		m_DynamicState.m_ConstantColor = color;
		SetSupportedRenderState( D3DRS_TEXTUREFACTOR, color );
	}
}

void CShaderAPIDx8::Color4f( float r, float g, float b, float a )
{
	unsigned int color = D3DCOLOR_ARGB( (int)(a * 255), (int)(r * 255), 
		(int)(g * 255), (int)(b * 255) );
	if (color != m_DynamicState.m_ConstantColor)
	{
		m_DynamicState.m_ConstantColor = color;
		SetSupportedRenderState( D3DRS_TEXTUREFACTOR, color );
	}
}

void CShaderAPIDx8::Color3fv( float const *c )
{
	Assert( c );
	unsigned int color = D3DCOLOR_ARGB( 255, (int)(c[0] * 255), 
		(int)(c[1] * 255), (int)(c[2] * 255) );
	if (color != m_DynamicState.m_ConstantColor)
	{
		m_DynamicState.m_ConstantColor = color;
		SetSupportedRenderState( D3DRS_TEXTUREFACTOR, color );
	}
}

void CShaderAPIDx8::Color4fv( float const *c )
{
	Assert( c );
	unsigned int color = D3DCOLOR_ARGB( (int)(c[3] * 255), (int)(c[0] * 255), 
		(int)(c[1] * 255), (int)(c[2] * 255) );
	if (color != m_DynamicState.m_ConstantColor)
	{
		m_DynamicState.m_ConstantColor = color;
		SetSupportedRenderState( D3DRS_TEXTUREFACTOR, color );
	}
}

void CShaderAPIDx8::Color3ub( unsigned char r, unsigned char g, unsigned char b )
{
	unsigned int color = D3DCOLOR_ARGB( 255, r, g, b ); 
	if (color != m_DynamicState.m_ConstantColor)
	{
		m_DynamicState.m_ConstantColor = color;
		SetSupportedRenderState( D3DRS_TEXTUREFACTOR, color );
	}
}

void CShaderAPIDx8::Color3ubv( unsigned char const* pColor )
{
	Assert( pColor );
	unsigned int color = D3DCOLOR_ARGB( 255, pColor[0], pColor[1], pColor[2] ); 
	if (color != m_DynamicState.m_ConstantColor)
	{
		m_DynamicState.m_ConstantColor = color;
		SetSupportedRenderState( D3DRS_TEXTUREFACTOR, color );
	}
}

void CShaderAPIDx8::Color4ub( unsigned char r, unsigned char g, unsigned char b, unsigned char a )
{
	unsigned int color = D3DCOLOR_ARGB( a, r, g, b );
	if (color != m_DynamicState.m_ConstantColor)
	{
		m_DynamicState.m_ConstantColor = color;
		SetSupportedRenderState( D3DRS_TEXTUREFACTOR, color );
	}
}

void CShaderAPIDx8::Color4ubv( unsigned char const* pColor )
{
	Assert( pColor );
	unsigned int color = D3DCOLOR_ARGB( pColor[3], pColor[0], pColor[1], pColor[2] ); 
	if (color != m_DynamicState.m_ConstantColor)
	{
		m_DynamicState.m_ConstantColor = color;
		SetSupportedRenderState( D3DRS_TEXTUREFACTOR, color );
	}
}


//-----------------------------------------------------------------------------
// The shade mode
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ShadeMode( ShaderShadeMode_t mode )
{
	LOCK_SHADERAPI();
	D3DSHADEMODE shadeMode = (mode == SHADER_FLAT) ? D3DSHADE_FLAT : D3DSHADE_GOURAUD;
	if (m_DynamicState.m_ShadeMode != shadeMode)
	{
		m_DynamicState.m_ShadeMode = shadeMode;
		SetRenderStateConstMacro( this, D3DRS_SHADEMODE, shadeMode );
	}
}


//-----------------------------------------------------------------------------
// Buffering 2 frames ahead
//-----------------------------------------------------------------------------
void CShaderAPIDx8::EnableBuffer2FramesAhead( bool bEnable )
{
#ifdef _X360
	m_bBuffer2FramesAhead = bEnable;
	if ( bEnable != m_DynamicState.m_bBuffer2Frames )
	{
		SetRenderState( D3DRS_BUFFER2FRAMES, bEnable );
		m_DynamicState.m_bBuffer2Frames = bEnable;
	}
#endif
}

void CShaderAPIDx8::SetDepthFeatheringPixelShaderConstant( int iConstant, float fDepthBlendScale )
{
	float fConstantValues[4];

	if( IsX360() )
	{
		const D3DMATRIX &projMatrix = GetProjectionMatrix();

		fConstantValues[0] = 50.0f / fDepthBlendScale;
		fConstantValues[1] = 1.0f / projMatrix.m[2][2];
		fConstantValues[2] = 1.0f / projMatrix.m[3][2];
		fConstantValues[3] = projMatrix.m[2][2];

		/*
		D3DXMATRIX invProjMatrix;
		D3DXMatrixInverse( &invProjMatrix, NULL, (D3DXMATRIX *)&projMatrix );
		fConstantValues[1] = invProjMatrix.m[3][2];
		fConstantValues[2] = invProjMatrix.m[3][3];
		fConstantValues[3] = invProjMatrix.m[2][2];
		*/
	}
	else
	{
		fConstantValues[0] = m_DynamicState.m_DestAlphaDepthRange / fDepthBlendScale;
		fConstantValues[1] = fConstantValues[2] = fConstantValues[3] = 0.0f; //empty
	}

	SetPixelShaderConstant( iConstant, fConstantValues );
}


//-----------------------------------------------------------------------------
// Cull mode..
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetCullModeState( bool bEnable, D3DCULL nDesiredCullMode )
{ 
	D3DCULL nCullMode = bEnable ? nDesiredCullMode : D3DCULL_NONE;
	if ( nCullMode != m_DynamicState.m_CullMode )
	{
		SetRenderStateConstMacro( this, D3DRS_CULLMODE, nCullMode );
		m_DynamicState.m_CullMode = nCullMode;
	}
}

void CShaderAPIDx8::ApplyCullEnable( bool bEnable )
{
	m_DynamicState.m_bCullEnabled = bEnable;
	SetCullModeState( m_DynamicState.m_bCullEnabled, m_DynamicState.m_DesiredCullMode );
}

void CShaderAPIDx8::CullMode( MaterialCullMode_t nCullMode )
{
	LOCK_SHADERAPI();
	D3DCULL nNewCullMode;
	switch( nCullMode )
	{
	case MATERIAL_CULLMODE_CCW:
		// Culls backfacing polys (normal)
		nNewCullMode = D3DCULL_CCW;
		break;

	case MATERIAL_CULLMODE_CW:
		// Culls frontfacing polys
		nNewCullMode = D3DCULL_CW;
		break;

	default:
		Warning( "CullMode: invalid cullMode\n" );
		return;
	}

	if (m_DynamicState.m_DesiredCullMode != nNewCullMode)
	{
		FlushBufferedPrimitives();
		m_DynamicState.m_DesiredCullMode = nNewCullMode;
		SetCullModeState( m_DynamicState.m_bCullEnabled, m_DynamicState.m_DesiredCullMode );
	}
}

static ConVar mat_alphacoverage( "mat_alphacoverage", "1" );
void CShaderAPIDx8::ApplyAlphaToCoverage( bool bEnable )
{
	if ( mat_alphacoverage.GetBool() )
	{
		if ( bEnable )
			EnableAlphaToCoverage();
		else
			DisableAlphaToCoverage();
	}
	else
	{
		DisableAlphaToCoverage();
	}
}

//-----------------------------------------------------------------------------
// Returns the current cull mode of the current material (for selection mode only)
//-----------------------------------------------------------------------------
D3DCULL CShaderAPIDx8::GetCullMode() const
{
	Assert( m_pMaterial );
	if ( m_pMaterial->GetMaterialVarFlag( MATERIAL_VAR_NOCULL ) )
		return D3DCULL_NONE;
	return m_DynamicState.m_DesiredCullMode;
}

void CShaderAPIDx8::SetRasterState( const ShaderRasterState_t& state )
{
	// FIXME: Implement!
}


void CShaderAPIDx8::ForceDepthFuncEquals( bool bEnable )
{
	LOCK_SHADERAPI();
	if ( !g_pShaderDeviceDx8->IsDeactivated() )
	{
		m_TransitionTable.ForceDepthFuncEquals( bEnable );
	}
}

void CShaderAPIDx8::OverrideDepthEnable( bool bEnable, bool bDepthEnable )
{
	LOCK_SHADERAPI();
	if ( !g_pShaderDeviceDx8->IsDeactivated() )
	{
		m_TransitionTable.OverrideDepthEnable( bEnable, bDepthEnable );
	}
}

void CShaderAPIDx8::OverrideAlphaWriteEnable( bool bOverrideEnable, bool bAlphaWriteEnable )
{
	LOCK_SHADERAPI();
	if ( !g_pShaderDeviceDx8->IsDeactivated() )
	{
		m_TransitionTable.OverrideAlphaWriteEnable( bOverrideEnable, bAlphaWriteEnable );
	}
}

void CShaderAPIDx8::OverrideColorWriteEnable( bool bOverrideEnable, bool bColorWriteEnable )
{
	LOCK_SHADERAPI();
	if ( !g_pShaderDeviceDx8->IsDeactivated() )
	{
		m_TransitionTable.OverrideColorWriteEnable( bOverrideEnable, bColorWriteEnable );
	}
}

void CShaderAPIDx8::UpdateFastClipUserClipPlane( void )
{
	float plane[4];
	switch( m_DynamicState.m_HeightClipMode )
	{
	case MATERIAL_HEIGHTCLIPMODE_DISABLE:
		EnableFastClip( false );
		break;
	case MATERIAL_HEIGHTCLIPMODE_RENDER_ABOVE_HEIGHT:
		plane[0] = 0.0f;
		plane[1] = 0.0f;
		plane[2] = 1.0f;
		plane[3] = m_DynamicState.m_HeightClipZ;
		EnableFastClip( true );
		SetFastClipPlane(plane);
		break;
	case MATERIAL_HEIGHTCLIPMODE_RENDER_BELOW_HEIGHT:
		plane[0] = 0.0f;
		plane[1] = 0.0f;
		plane[2] = -1.0f;
		plane[3] = -m_DynamicState.m_HeightClipZ;
		EnableFastClip( true );
		SetFastClipPlane(plane);
		break;
	}
}

void CShaderAPIDx8::SetHeightClipZ( float z )
{
	LOCK_SHADERAPI();
	if( z != m_DynamicState.m_HeightClipZ )
	{
		FlushBufferedPrimitives();
		m_DynamicState.m_HeightClipZ = z;
		UpdateVertexShaderFogParams();
		UpdateFastClipUserClipPlane();
		m_DynamicState.m_TransformChanged[MATERIAL_PROJECTION] |= 
			STATE_CHANGED_VERTEX_SHADER | STATE_CHANGED_FIXED_FUNCTION;
	}
}

void CShaderAPIDx8::SetHeightClipMode( MaterialHeightClipMode_t heightClipMode )
{
	LOCK_SHADERAPI();
	if( heightClipMode != m_DynamicState.m_HeightClipMode )
	{
		FlushBufferedPrimitives();
		m_DynamicState.m_HeightClipMode = heightClipMode;
		UpdateVertexShaderFogParams();
		UpdateFastClipUserClipPlane();
		m_DynamicState.m_TransformChanged[MATERIAL_PROJECTION] |= 
			STATE_CHANGED_VERTEX_SHADER | STATE_CHANGED_FIXED_FUNCTION;
	}
}

void CShaderAPIDx8::SetClipPlane( int index, const float *pPlane )
{
	LOCK_SHADERAPI();
	Assert( index < g_pHardwareConfig->MaxUserClipPlanes() && index >= 0 );

	// NOTE: The plane here is specified in *world space*
	// NOTE: This is done because they assume Ax+By+Cz+Dw = 0 (where w = 1 in real space)
	// while we use Ax+By+Cz=D
	D3DXPLANE plane;
	plane.a = pPlane[0];
	plane.b = pPlane[1];
	plane.c = pPlane[2];
	plane.d = -pPlane[3];

	if ( plane != m_DynamicState.m_UserClipPlaneWorld[index] )
	{
		FlushBufferedPrimitives();

		m_DynamicState.m_UserClipPlaneChanged |= ( 1 << index );
		m_DynamicState.m_UserClipPlaneWorld[index] = plane;
	}
}


//-----------------------------------------------------------------------------
// Converts a D3DXMatrix to a VMatrix and back
//-----------------------------------------------------------------------------
void CShaderAPIDx8::D3DXMatrixToVMatrix( const D3DXMATRIX &in, VMatrix &out )
{
	MatrixTranspose( *(const VMatrix*)&in, out );
}

void CShaderAPIDx8::VMatrixToD3DXMatrix( const VMatrix &in, D3DXMATRIX &out )
{
	MatrixTranspose( in, *(VMatrix*)&out );
}

	
//-----------------------------------------------------------------------------
// Mark all user clip planes as being dirty
//-----------------------------------------------------------------------------
void CShaderAPIDx8::MarkAllUserClipPlanesDirty()
{
	m_DynamicState.m_UserClipPlaneChanged |= ( 1 << g_pHardwareConfig->MaxUserClipPlanes() ) - 1;
	m_DynamicState.m_bFastClipPlaneChanged = true;
}


//-----------------------------------------------------------------------------
// User clip plane override
//-----------------------------------------------------------------------------
void CShaderAPIDx8::EnableUserClipTransformOverride( bool bEnable )
{
	LOCK_SHADERAPI();
	if ( m_DynamicState.m_bUserClipTransformOverride != bEnable )
	{
		FlushBufferedPrimitives();
		m_DynamicState.m_bUserClipTransformOverride = bEnable;
		MarkAllUserClipPlanesDirty();
	}
}


//-----------------------------------------------------------------------------
// Specify user clip transform
//-----------------------------------------------------------------------------
void CShaderAPIDx8::UserClipTransform( const VMatrix &worldToProjection )
{
	LOCK_SHADERAPI();
	D3DXMATRIX dxWorldToProjection;
	VMatrixToD3DXMatrix( worldToProjection, dxWorldToProjection );

	if ( m_DynamicState.m_UserClipTransform != dxWorldToProjection )
	{
		m_DynamicState.m_UserClipTransform = dxWorldToProjection;
		if ( m_DynamicState.m_bUserClipTransformOverride )
		{
			FlushBufferedPrimitives();
			MarkAllUserClipPlanesDirty();
		}
	}
}


//-----------------------------------------------------------------------------
// Enables a user clip plane
//-----------------------------------------------------------------------------
void CShaderAPIDx8::EnableClipPlane( int index, bool bEnable )
{
	LOCK_SHADERAPI();
	Assert( index < g_pHardwareConfig->MaxUserClipPlanes() && index >= 0 );
	if( ( m_DynamicState.m_UserClipPlaneEnabled & ( 1 << index ) ? true : false ) != bEnable )
	{
		FlushBufferedPrimitives();
		if( bEnable )
		{
			m_DynamicState.m_UserClipPlaneEnabled |= ( 1 << index );
		}
		else
		{
			m_DynamicState.m_UserClipPlaneEnabled &= ~( 1 << index );
		}
		SetRenderStateConstMacro( this, D3DRS_CLIPPLANEENABLE, m_DynamicState.m_UserClipPlaneEnabled );
	}
}

//-----------------------------------------------------------------------------
// Recomputes the fast-clip plane matrices based on the current fast-clip plane
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitFastClipPlane( )
{
	// Don't bother recomputing if unchanged or disabled
	if ( !m_DynamicState.m_bFastClipPlaneChanged || !m_DynamicState.m_FastClipEnabled )
		return;

	m_DynamicState.m_bFastClipPlaneChanged = false;

	D3DXMatrixIdentity( &m_CachedFastClipProjectionMatrix );

	// Compute worldToProjection - need inv. transpose for transforming plane.
	D3DXMATRIX viewToProjInvTrans, viewToProjInv, viewToProj = GetTransform(MATERIAL_PROJECTION);
	viewToProj._43 *= 0.5f;		// pull in zNear because the shear in effect 
								// moves it out: clipping artifacts when looking down at water 
								// could occur if this multiply is not done

	D3DXMATRIX worldToViewInvTrans, worldToViewInv, worldToView = GetUserClipTransform();

	D3DXMatrixInverse( &worldToViewInv, NULL, &worldToView );
	D3DXMatrixTranspose( &worldToViewInvTrans, &worldToViewInv ); 	

	D3DXMatrixInverse( &viewToProjInv, NULL, &viewToProj );
	D3DXMatrixTranspose( &viewToProjInvTrans, &viewToProjInv ); 

	D3DXPLANE plane;
	D3DXPlaneNormalize( &plane, &m_DynamicState.m_FastClipPlane );
	D3DXVECTOR4 clipPlane( plane.a, plane.b, plane.c, plane.d );

	// transform clip plane into view space
	D3DXVec4Transform( &clipPlane, &clipPlane, &worldToViewInvTrans );

	// transform clip plane into projection space
	D3DXVec4Transform( &clipPlane, &clipPlane, &viewToProjInvTrans );
	
#define ALLOW_FOR_FASTCLIPDUMPS 0

#if (ALLOW_FOR_FASTCLIPDUMPS == 1)
	static ConVar shader_dumpfastclipprojectioncoords( "shader_dumpfastclipprojectioncoords", "0", 0, "dump fast clip projected matrix" );
	if( shader_dumpfastclipprojectioncoords.GetBool() )
		DevMsg( "Fast clip plane projected coordinates: %f %f %f %f", clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w );
#endif
	
	if( (clipPlane.z * clipPlane.w) <= -0.4f ) // a plane with (z*w) > -0.4 at this point is behind the camera and will cause graphical glitches. Toss it. (0.4 found through experimentation)
	{
#if (ALLOW_FOR_FASTCLIPDUMPS == 1)
		if( shader_dumpfastclipprojectioncoords.GetBool() )
			DevMsg( "    %f %f %f %f\n", clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w );
#endif

		D3DXVec4Normalize( &clipPlane, &clipPlane );

		//if ((fabs(clipPlane.z) > 0.01) && (fabs(clipPlane.w) > 0.01f))  
		{
			// put projection space clip plane in Z column
			m_CachedFastClipProjectionMatrix._13 = clipPlane.x;
			m_CachedFastClipProjectionMatrix._23 = clipPlane.y;
			m_CachedFastClipProjectionMatrix._33 = clipPlane.z;
			m_CachedFastClipProjectionMatrix._43 = clipPlane.w;
		}
	}
#if (ALLOW_FOR_FASTCLIPDUMPS == 1)
	else
	{
		if( shader_dumpfastclipprojectioncoords.GetBool() )
			DevMsg( "\n" ); //finish off the line above
	}
#endif

	m_CachedFastClipProjectionMatrix = viewToProj * m_CachedFastClipProjectionMatrix;

	// Update the cached polyoffset matrix (with clip) too:
	ComputePolyOffsetMatrix( m_CachedFastClipProjectionMatrix, m_CachedFastClipPolyOffsetProjectionMatrix );
}

//-----------------------------------------------------------------------------
// Sets the fast-clip plane; but doesn't update the matrices
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetFastClipPlane( const float *pPlane )
{	
	LOCK_SHADERAPI();
	D3DXPLANE plane;
	plane.a = pPlane[0];
	plane.b = pPlane[1];
	plane.c = pPlane[2];
	plane.d = -pPlane[3];
	if ( plane != m_DynamicState.m_FastClipPlane )
	{
		FlushBufferedPrimitives();		
		UpdateVertexShaderFogParams();

		m_DynamicState.m_FastClipPlane = plane;

		// Mark a dirty bit so when it comes time to commit view + projection transforms,
		// we also update the fast clip matrices
		m_DynamicState.m_bFastClipPlaneChanged = true;

		m_DynamicState.m_TransformChanged[MATERIAL_PROJECTION] |= 
			STATE_CHANGED_VERTEX_SHADER | STATE_CHANGED_FIXED_FUNCTION;
	}
}


//-----------------------------------------------------------------------------
// Enables/disables fast-clip mode
//-----------------------------------------------------------------------------
void CShaderAPIDx8::EnableFastClip( bool bEnable )
{
	LOCK_SHADERAPI();
	if( m_DynamicState.m_FastClipEnabled != bEnable )
	{
		FlushBufferedPrimitives();
		UpdateVertexShaderFogParams();

		m_DynamicState.m_FastClipEnabled = bEnable;

		m_DynamicState.m_TransformChanged[MATERIAL_PROJECTION] |= 
			STATE_CHANGED_VERTEX_SHADER | STATE_CHANGED_FIXED_FUNCTION;
	}
}

/*
// -----------------------------------------------------------------------------
// SetInvariantClipVolume - This routine takes six planes as input and sets the
// appropriate Direct3D user clip plane state
// What we mean by "invariant clipping" here is that certain devices implement
// user clip planes at the raster level, which means that multi-pass rendering
// where one pass is unclipped (such as base geometry) and another pass *IS*
// clipped (such as flashlight geometry), there is no z-fighting since the
// clipping is implemented at the raster level in an "invariant" way
// -----------------------------------------------------------------------------
void CShaderAPIDx8::SetInvariantClipVolume( Frustum_t *pFrustumPlanes )
{
	// Only do this on modern nVidia hardware, which does invariant clipping
	if ( m_VendorID == VENDORID_NVIDIA )
	{
		if ( pFrustumPlanes )
		{
//			if ()
//			{
//
//			}

			for (int i=0; i<6; i++)
			{
				const cplane_t *pPlane = pFrustumPlanes->GetPlane(i);

				SetClipPlane( i, (float *) &pPlane->normal );
				EnableClipPlane( i, true );

//	FRUSTUM_RIGHT		= 0,
//	FRUSTUM_LEFT		= 1,
//	FRUSTUM_TOP			= 2,
//	FRUSTUM_BOTTOM		= 3,
//	FRUSTUM_NEARZ		= 4,
//	FRUSTUM_FARZ		= 5,

			}
		}
		else // NULL disables the invariant clip volume...
		{
			for (int i=0; i<6; i++)
			{
				EnableClipPlane( i, false );
			}
		}
	}
}
*/

//-----------------------------------------------------------------------------
// Vertex blending
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetVertexBlendState( int numBones )
{
	if (numBones < 0)
	{
		numBones = m_DynamicState.m_NumBones;
	}

	// For fixed-function, the number of weights is actually one less than
	// the number of bones
	if (numBones > 0)
		--numBones;

	bool normalizeNormals = true;
	D3DVERTEXBLENDFLAGS vertexBlend;
	switch(numBones)
	{
	case 0:
		vertexBlend = D3DVBF_DISABLE;
		normalizeNormals = false;
		break;

	case 1:
		vertexBlend = D3DVBF_1WEIGHTS;
		break;

	case 2:
		vertexBlend = D3DVBF_2WEIGHTS;
		break;

	case 3:
		vertexBlend = D3DVBF_3WEIGHTS;
		break;

	default:
		vertexBlend = D3DVBF_DISABLE;
		Assert(0);
		break;
	}

	if (m_DynamicState.m_VertexBlend != vertexBlend)
	{
		m_DynamicState.m_VertexBlend  = vertexBlend;
		SetSupportedRenderState( D3DRS_VERTEXBLEND, vertexBlend );
	}

	// Activate normalize normals when skinning is on
	if (m_DynamicState.m_NormalizeNormals != normalizeNormals)
	{
		m_DynamicState.m_NormalizeNormals  = normalizeNormals;
		SetSupportedRenderState( D3DRS_NORMALIZENORMALS, normalizeNormals );
	}
}


//-----------------------------------------------------------------------------
// Vertex blending
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetNumBoneWeights( int numBones )
{
	LOCK_SHADERAPI();
	if (m_DynamicState.m_NumBones != numBones)
	{
		FlushBufferedPrimitives();
		m_DynamicState.m_NumBones = numBones;

		if ( m_TransitionTable.CurrentShadowState() )
		{
			SetVertexBlendState( m_TransitionTable.CurrentShadowState()->m_VertexBlendEnable ? -1 : 0 );
		}
	}
}

void CShaderAPIDx8::EnableHWMorphing( bool bEnable )
{
	LOCK_SHADERAPI();
	if ( m_DynamicState.m_bHWMorphingEnabled != bEnable )
	{
		FlushBufferedPrimitives();
		m_DynamicState.m_bHWMorphingEnabled = bEnable;
	}
}

void CShaderAPIDx8::EnabledSRGBWrite( bool bEnabled )
{
	m_DynamicState.m_bSRGBWritesEnabled = bEnabled;

	if ( g_pHardwareConfig->SupportsPixelShaders_2_b() )
	{
		UpdatePixelFogColorConstant();

		if ( bEnabled && g_pHardwareConfig->NeedsShaderSRGBConversion() )
			BindTexture( SHADER_SAMPLER15, m_hLinearToGammaTableTexture );
		else
			BindTexture( SHADER_SAMPLER15, m_hLinearToGammaTableIdentityTexture );
	}
}

#if defined( _X360 )
void CShaderAPIDx8::ApplySRGBReadState( int iTextureStage, bool bSRGBReadEnabled )
{
	Sampler_t sampler = (Sampler_t)iTextureStage;
	SamplerState_t &samplerState = SamplerState( sampler );
	samplerState.m_SRGBReadEnable = bSRGBReadEnabled;

	if ( ( samplerState.m_BoundTexture == INVALID_SHADERAPI_TEXTURE_HANDLE ) || !samplerState.m_TextureEnable )
	{
		return;
	}

	IDirect3DBaseTexture *pBindTexture = CShaderAPIDx8::GetD3DTexture( samplerState.m_BoundTexture );
	if ( !pBindTexture )
	{
		return;
	}
	
	DWORD linearFormatBackup = pBindTexture->Format.dword[0]; //if we convert to srgb format, we need the original format for reverting. We only need the first DWORD of GPUTEXTURE_FETCH_CONSTANT.
	if ( bSRGBReadEnabled )
	{
		pBindTexture->Format.SignX = pBindTexture->Format.SignY = pBindTexture->Format.SignZ = 3; //convert to srgb format for the bind. This effectively emulates the old srgb read sampler state
	}

	Dx9Device()->SetTexture( sampler, pBindTexture );

	// copy back the format in case we changed it
	pBindTexture->Format.dword[0] = linearFormatBackup;
}
#endif

//-----------------------------------------------------------------------------
// Fog methods...
//-----------------------------------------------------------------------------
void CShaderAPIDx8::UpdatePixelFogColorConstant( void )
{
	Assert( HardwareConfig()->SupportsPixelShaders_2_b() );
	float fogColor[4];

	switch( GetPixelFogMode() )
	{
	case MATERIAL_FOG_NONE:
		{
			for( int i = 0; i != 3; ++i )
				fogColor[i] = 0.0f;
		}
		break;

	case MATERIAL_FOG_LINEAR:
		{
			//setup the fog for mixing linear fog in the pixel shader so that it emulates ff range fog
			for( int i = 0; i != 3; ++i )
				fogColor[i] = m_DynamicState.m_PixelFogColor[i];

			if( m_DynamicState.m_bSRGBWritesEnabled ) 
			{
				//since the fog color will assuredly get converted from linear to gamma, we should probably convert it from gamma to linear
				for( int i = 0; i != 3; ++i )
					fogColor[i] = GammaToLinear_HardwareSpecific( fogColor[i] );
			}

			if( (!m_DynamicState.m_bFogGammaCorrectionDisabled) && (g_pHardwareConfig->GetHDRType() == HDR_TYPE_INTEGER) )
			{
				for( int i = 0; i != 3; ++i )
					fogColor[i] *= m_ToneMappingScale.x; // Linear
			}
		}
		break;

	case MATERIAL_FOG_LINEAR_BELOW_FOG_Z:
		{
			//water fog has been around a while and has never tonemap scaled, and has always been in linear space
			if( g_pHardwareConfig->NeedsShaderSRGBConversion() )
			{
				//srgb in ps2b uses the 2.2 curve
				for( int i = 0; i != 3; ++i )
					fogColor[i] = pow( m_DynamicState.m_PixelFogColor[i], 2.2f );
			}
			else
			{
				for( int i = 0; i != 3; ++i )
					fogColor[i] = GammaToLinear_HardwareSpecific( m_DynamicState.m_PixelFogColor[i] ); //this is how water fog color has always been setup in the past
			}

			if( (!m_DynamicState.m_bFogGammaCorrectionDisabled) && (g_pHardwareConfig->GetHDRType() == HDR_TYPE_INTEGER) )
			{
				for( int i = 0; i != 3; ++i )
					fogColor[i] *= m_ToneMappingScale.x; // Linear
			}
		}
		break;

		NO_DEFAULT;
	};	

	fogColor[3] = 1.0f / m_DynamicState.m_DestAlphaDepthRange;

	SetPixelShaderConstant( LINEAR_FOG_COLOR, fogColor );
}


void CShaderAPIDx8::ApplyFogMode( ShaderFogMode_t fogMode, bool bSRGBWritesEnabled, bool bDisableFogGammaCorrection )
{
	HDRType_t hdrType = g_pHardwareConfig->GetHDRType();

	if ( fogMode == SHADER_FOGMODE_DISABLED )
	{
		if( hdrType != HDR_TYPE_FLOAT )
		{
			FogMode( MATERIAL_FOG_NONE );
		}

		if( m_DelayedShaderConstants.iPixelShaderFogParams != -1 )
			SetPixelShaderFogParams( m_DelayedShaderConstants.iPixelShaderFogParams, fogMode );

		return;
	}
	
	bool bShouldGammaCorrect = true;			// By default, we'll gamma correct.
	unsigned char r = 0, g = 0, b = 0;			// Black fog

	
	if( hdrType != HDR_TYPE_FLOAT )
	{
		FogMode( m_SceneFogMode );
	}

	if( m_DelayedShaderConstants.iPixelShaderFogParams != -1 )
		SetPixelShaderFogParams( m_DelayedShaderConstants.iPixelShaderFogParams, fogMode );

	switch( fogMode )
	{
		case SHADER_FOGMODE_BLACK:				// Additive decals
			bShouldGammaCorrect = false;
			break;
		case SHADER_FOGMODE_OO_OVERBRIGHT:
		case SHADER_FOGMODE_GREY:				// Mod2x decals
			r = g = b = 128;
			break;
		case SHADER_FOGMODE_WHITE:				// Multiplicative decals
			r = g = b = 255;
			bShouldGammaCorrect = false;
			break;
		case SHADER_FOGMODE_FOGCOLOR:
			GetSceneFogColor( &r, &g, &b );		// Scene fog color
			break;
		NO_DEFAULT
	}

	bShouldGammaCorrect &= !bDisableFogGammaCorrection;
	m_DynamicState.m_bFogGammaCorrectionDisabled = !bShouldGammaCorrect;

	D3DCOLOR color;
	if ( bShouldGammaCorrect )
	{
		color = ComputeGammaCorrectedFogColor( r, g, b, bSRGBWritesEnabled );
	}
	else
	{
		color = D3DCOLOR_ARGB( 255, r, g, b );
	}


	const float fColorScale = 1.0f / 255.0f;
	m_DynamicState.m_PixelFogColor[0] = (float)(r) * fColorScale;
	m_DynamicState.m_PixelFogColor[1] = (float)(g) * fColorScale;
	m_DynamicState.m_PixelFogColor[2] = (float)(b) * fColorScale;

	if( g_pMaterialSystemHardwareConfig->SupportsPixelShaders_2_b() )
	{
		UpdatePixelFogColorConstant();
	}

	if ( color != m_DynamicState.m_FogColor )
	{
		if( hdrType != HDR_TYPE_FLOAT )
		{
			m_DynamicState.m_FogColor = color;
			SetRenderStateConstMacro( this, D3DRS_FOGCOLOR, m_DynamicState.m_FogColor );
		}
	}
}

void CShaderAPIDx8::SceneFogMode( MaterialFogMode_t fogMode )
{
	LOCK_SHADERAPI();
	if( m_SceneFogMode != fogMode )
	{
		FlushBufferedPrimitives();
		m_SceneFogMode = fogMode;

		if ( m_TransitionTable.CurrentShadowState() )
		{
			// Get the shadow state in sync since it depends on SceneFogMode.
			ApplyFogMode( m_TransitionTable.CurrentShadowState()->m_FogMode, m_TransitionTable.CurrentShadowState()->m_SRGBWriteEnable, m_TransitionTable.CurrentShadowState()->m_bDisableFogGammaCorrection );
		}
	}
}

MaterialFogMode_t CShaderAPIDx8::GetSceneFogMode()
{
	return m_SceneFogMode;
}

MaterialFogMode_t CShaderAPIDx8::GetPixelFogMode()
{
	if( ShouldUsePixelFogForMode( m_SceneFogMode ) )
		return m_SceneFogMode;
	else
		return MATERIAL_FOG_NONE;
}

int CShaderAPIDx8::GetPixelFogCombo( void )
{
	if( (m_SceneFogMode != MATERIAL_FOG_NONE) && ShouldUsePixelFogForMode( m_SceneFogMode ) )
		return m_SceneFogMode - 1; //PIXELFOGTYPE dynamic combos are shifted down one. MATERIAL_FOG_NONE is simulated by range fog with rigged parameters. Gets rid of a dynamic combo across the ps2x set
	else	
		return MATERIAL_FOG_NONE;
}


static ConVar r_pixelfog( "r_pixelfog", "1" );

bool CShaderAPIDx8::ShouldUsePixelFogForMode( MaterialFogMode_t fogMode )
{
	if( fogMode == MATERIAL_FOG_NONE )
		return false;

	if( IsX360() || IsPosix() ) // Always use pixel fog on X360 and Posix
		return true;

	if( g_pHardwareConfig->Caps().m_nDXSupportLevel < 90 ) //pixel fog not available until at least ps2.0
		return false;

	switch( m_SceneFogMode )
	{
		case MATERIAL_FOG_LINEAR:
			return (g_pHardwareConfig->SupportsPixelShaders_2_b() && //lightmappedgeneric_ps20.fxc can't handle the instruction count
						r_pixelfog.GetBool()); //use pixel fog if preferred

		case MATERIAL_FOG_LINEAR_BELOW_FOG_Z:
			return true;
	};

	return false;
}

//-----------------------------------------------------------------------------
// Fog methods...
//-----------------------------------------------------------------------------
void CShaderAPIDx8::FogMode( MaterialFogMode_t fogMode )
{
	bool bFogEnable;

	if ( IsX360() )
	{
		// FF fog not applicable on 360
		return;
	}

	m_DynamicState.m_SceneFog = fogMode;
	switch( fogMode )
	{
	default:
		Assert( 0 );
		// fall through

	case MATERIAL_FOG_NONE:
		bFogEnable = false;
		break;

	case MATERIAL_FOG_LINEAR:
		// use vertex fog to achieve linear range fog
		bFogEnable = true;
		break;

	case MATERIAL_FOG_LINEAR_BELOW_FOG_Z:
		// use pixel fog on 9.0 and greater for height fog
		bFogEnable = g_pHardwareConfig->Caps().m_nDXSupportLevel < 90;
		break;
	}

	if( ShouldUsePixelFogForMode( fogMode ) )
	{
		bFogEnable = false; //disable FF fog when doing fog in the pixel shader
	}

#if 0
	// HACK - do this to disable fog always
	bFogEnable = false;
	m_DynamicState.m_SceneFog = MATERIAL_FOG_NONE;
#endif

	// These two are always set to this, so don't bother setting them.
	// We are always using vertex fog.
//	SetRenderState( D3DRS_FOGTABLEMODE, D3DFOG_NONE );
//	SetRenderState( D3DRS_RANGEFOGENABLE, false );

	// Set fog enable if it's different than before.
	if ( bFogEnable != m_DynamicState.m_FogEnable )
	{
		SetSupportedRenderState( D3DRS_FOGENABLE, bFogEnable );
		m_DynamicState.m_FogEnable = bFogEnable;
	}
}


void CShaderAPIDx8::FogStart( float fStart )
{
	LOCK_SHADERAPI();
	if (fStart != m_DynamicState.m_FogStart)
	{
		// need to flush the dynamic buffer
		FlushBufferedPrimitives();

		SetRenderStateConstMacro( this, D3DRS_FOGSTART, *((DWORD*)(&fStart)));
		m_VertexShaderFogParams[0] = fStart;
		UpdateVertexShaderFogParams();
		m_DynamicState.m_FogStart = fStart;
	}
}

void CShaderAPIDx8::FogEnd( float fEnd )
{
	LOCK_SHADERAPI();
	if (fEnd != m_DynamicState.m_FogEnd)
	{
		// need to flush the dynamic buffer
		FlushBufferedPrimitives();

		SetRenderStateConstMacro( this, D3DRS_FOGEND, *((DWORD*)(&fEnd)));
		m_VertexShaderFogParams[1] = fEnd;
		UpdateVertexShaderFogParams();
		m_DynamicState.m_FogEnd = fEnd;
	}
}

void CShaderAPIDx8::SetFogZ( float fogZ )
{
	LOCK_SHADERAPI();
	if (fogZ != m_DynamicState.m_FogZ)
	{
		// need to flush the dynamic buffer
		FlushBufferedPrimitives();
		m_DynamicState.m_FogZ = fogZ;
		UpdateVertexShaderFogParams();
	}
}

void CShaderAPIDx8::FogMaxDensity( float flMaxDensity )
{
	LOCK_SHADERAPI();
	if (flMaxDensity != m_DynamicState.m_FogMaxDensity)
	{
		// need to flush the dynamic buffer
		FlushBufferedPrimitives();

//		SetRenderState(D3DRS_FOGDENSITY, *((DWORD*)(&flMaxDensity)));  // ??? do I need to to this ???
		m_flFogMaxDensity = flMaxDensity;
		UpdateVertexShaderFogParams();
		m_DynamicState.m_FogMaxDensity = flMaxDensity;
	}
}

void CShaderAPIDx8::GetFogDistances( float *fStart, float *fEnd, float *fFogZ )
{
	LOCK_SHADERAPI();
	if( fStart )
		*fStart = m_DynamicState.m_FogStart;

	if( fEnd )
		*fEnd = m_DynamicState.m_FogEnd;

	if( fFogZ )
		*fFogZ = m_DynamicState.m_FogZ;
}

void CShaderAPIDx8::SceneFogColor3ub( unsigned char r, unsigned char g, unsigned char b )
{
	LOCK_SHADERAPI();
	if( m_SceneFogColor[0] != r || m_SceneFogColor[1] != g || m_SceneFogColor[2] != b )
	{
		FlushBufferedPrimitives();
		m_SceneFogColor[0] = r;
		m_SceneFogColor[1] = g;
		m_SceneFogColor[2] = b;

		if ( m_TransitionTable.CurrentShadowState() )
		{
			ApplyFogMode( m_TransitionTable.CurrentShadowState()->m_FogMode, m_TransitionTable.CurrentShadowState()->m_SRGBWriteEnable, m_TransitionTable.CurrentShadowState()->m_bDisableFogGammaCorrection );
		}
	}
}

void CShaderAPIDx8::GetSceneFogColor( unsigned char *rgb )
{
	LOCK_SHADERAPI();
	rgb[0] = m_SceneFogColor[0];
	rgb[1] = m_SceneFogColor[1];
	rgb[2] = m_SceneFogColor[2];
}

void CShaderAPIDx8::GetSceneFogColor( unsigned char *r, unsigned char *g, unsigned char *b )
{
	*r = m_SceneFogColor[0];
	*g = m_SceneFogColor[1];
	*b = m_SceneFogColor[2];
}


//-----------------------------------------------------------------------------
// Gamma correction of fog color, or not...
//-----------------------------------------------------------------------------
D3DCOLOR CShaderAPIDx8::ComputeGammaCorrectedFogColor( unsigned char r, unsigned char g, unsigned char b, bool bSRGBWritesEnabled )
{
#ifdef _DEBUG
	if( g_pHardwareConfig->GetHDRType() == HDR_TYPE_FLOAT && !bSRGBWritesEnabled )
	{
//		Assert( 0 );
	}
#endif
	bool bLinearSpace =   g_pHardwareConfig->Caps().m_bFogColorAlwaysLinearSpace ||
						( bSRGBWritesEnabled && ( g_pHardwareConfig->Caps().m_bFogColorSpecifiedInLinearSpace || g_pHardwareConfig->GetHDRType() == HDR_TYPE_FLOAT ) );

	bool bScaleFogByToneMappingScale = g_pHardwareConfig->GetHDRType() == HDR_TYPE_INTEGER;

	float fr = ( r / 255.0f );
	float fg = ( g / 255.0f );
	float fb = ( b / 255.0f );
	if ( bLinearSpace )
	{
		fr = GammaToLinear( fr ); 
		fg = GammaToLinear( fg ); 
		fb = GammaToLinear( fb ); 
		if ( bScaleFogByToneMappingScale )
		{
			fr *= m_ToneMappingScale.x;		//
			fg *= m_ToneMappingScale.x;		// Linear
			fb *= m_ToneMappingScale.x;		//
		}
	}
	else if ( bScaleFogByToneMappingScale )
	{
		fr *= m_ToneMappingScale.w;			//
		fg *= m_ToneMappingScale.w;			// Gamma
		fb *= m_ToneMappingScale.w;			//
	}

	fr = min( fr, 1.0f );
	fg = min( fg, 1.0f );
	fb = min( fb, 1.0f );
	r = (int)( fr * 255 );
	g = (int)( fg * 255 );
	b = (int)( fb * 255 );
	return D3DCOLOR_ARGB( 255, r, g, b ); 
}


//-----------------------------------------------------------------------------
// Some methods chaining vertex + pixel shaders through to the shader manager
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetVertexShaderIndex( int vshIndex )
{
	ShaderManager()->SetVertexShaderIndex( vshIndex );
}

void CShaderAPIDx8::SetPixelShaderIndex( int pshIndex )
{
	ShaderManager()->SetPixelShaderIndex( pshIndex );
}

void CShaderAPIDx8::SyncToken( const char *pToken )
{
	LOCK_SHADERAPI();
	RECORD_COMMAND( DX8_SYNC_TOKEN, 1 );
	RECORD_STRING( pToken );
}


//-----------------------------------------------------------------------------
// Deals with vertex buffers
//-----------------------------------------------------------------------------
void CShaderAPIDx8::DestroyVertexBuffers( bool bExitingLevel )
{
	LOCK_SHADERAPI();
	MeshMgr()->DestroyVertexBuffers( );
	// After a map is shut down, we switch to using smaller dynamic VBs
	// (VGUI shouldn't need much), so that we have more memory free during map loading
	m_nDynamicVBSize = bExitingLevel ? DYNAMIC_VERTEX_BUFFER_MEMORY_SMALL : DYNAMIC_VERTEX_BUFFER_MEMORY;
}

int CShaderAPIDx8::GetCurrentDynamicVBSize( void )
{
	return m_nDynamicVBSize;
}

FORCEINLINE void CShaderAPIDx8::SetVertexShaderConstantInternal( int var, float const* pVec, int numVecs, bool bForce )
{
	Assert( numVecs > 0 );
	Assert( pVec );

	if ( IsPC() || IsPS3() )
	{
		Assert( var + numVecs <= g_pHardwareConfig->NumVertexShaderConstants() );

		if ( !bForce && memcmp( pVec, &m_DynamicState.m_pVectorVertexShaderConstant[var], numVecs * 4 * sizeof( float ) ) == 0 )
			return;

		Dx9Device()->SetVertexShaderConstantF( var, pVec, numVecs );
		memcpy( &m_DynamicState.m_pVectorVertexShaderConstant[var], pVec, numVecs * 4 * sizeof(float) );
	}
	else
	{
		Assert( var + numVecs <= g_pHardwareConfig->NumVertexShaderConstants() );
	}

	if ( IsX360() && var + numVecs > m_MaxVectorVertexShaderConstant )
			m_MaxVectorVertexShaderConstant = var + numVecs;

	memcpy( &m_DesiredState.m_pVectorVertexShaderConstant[var], pVec, numVecs * 4 * sizeof(float) );	
}


//-----------------------------------------------------------------------------
// Sets the constant register for vertex and pixel shaders
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetVertexShaderConstant( int var, float const* pVec, int numVecs, bool bForce )
{
	SetVertexShaderConstantInternal( var, pVec, numVecs, bForce );
}

//-----------------------------------------------------------------------------
// Sets the boolean registers for vertex shader control flow
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetBooleanVertexShaderConstant( int var, int const* pVec, int numBools, bool bForce )
{
	Assert( pVec );
	Assert( var + numBools <= g_pHardwareConfig->NumBooleanVertexShaderConstants() );

	if ( IsPC() && g_pHardwareConfig->GetDXSupportLevel() < 90 )
	{
		return;
	}

	if ( IsPC() && !bForce && memcmp( pVec, &m_DesiredState.m_pBooleanVertexShaderConstant[var], numBools * sizeof( BOOL ) ) == 0 )
	{
		return;
	}

	if ( IsPC() )
	{
		Dx9Device()->SetVertexShaderConstantB( var, pVec, numBools );
		memcpy( &m_DynamicState.m_pBooleanVertexShaderConstant[var], pVec, numBools * sizeof(BOOL) );
	}

	memcpy( &m_DesiredState.m_pBooleanVertexShaderConstant[var], pVec, numBools * sizeof(BOOL) );

	if ( IsX360() && var + numBools > m_MaxBooleanVertexShaderConstant )
	{
		m_MaxBooleanVertexShaderConstant = var + numBools;
		Assert( m_MaxBooleanVertexShaderConstant <= 16 );
	}
}


//-----------------------------------------------------------------------------
// Sets the integer registers for vertex shader control flow
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetIntegerVertexShaderConstant( int var, int const* pVec, int numIntVecs, bool bForce )
{
	Assert( pVec );
	Assert( var + numIntVecs <= g_pHardwareConfig->NumIntegerVertexShaderConstants() );

	if ( IsPC() && g_pHardwareConfig->GetDXSupportLevel() < 90 )
	{
		return;
	}

	if ( IsPC() && !bForce && memcmp( pVec, &m_DesiredState.m_pIntegerVertexShaderConstant[var], numIntVecs * sizeof( IntVector4D ) ) == 0 )
	{
		return;
	}

	if ( IsPC() )
	{
		Dx9Device()->SetVertexShaderConstantI( var, pVec, numIntVecs );
		memcpy( &m_DynamicState.m_pIntegerVertexShaderConstant[var], pVec, numIntVecs * sizeof(IntVector4D) );
	}
	memcpy( &m_DesiredState.m_pIntegerVertexShaderConstant[var], pVec, numIntVecs * sizeof(IntVector4D) );

	if ( IsX360() && var + numIntVecs > m_MaxIntegerVertexShaderConstant )
	{
		m_MaxIntegerVertexShaderConstant = var + numIntVecs;
		Assert( m_MaxIntegerVertexShaderConstant <= 16 );
	}
}

FORCEINLINE void CShaderAPIDx8::SetPixelShaderConstantInternal( int nStartConst, float const* pValues, int nNumConsts, bool bForce )
{
	Assert( nStartConst + nNumConsts <= g_pHardwareConfig->NumPixelShaderConstants() );

	if ( IsPC() || IsPS3() )
	{
		if ( !bForce )
		{
			DWORD* pSrc = (DWORD*)pValues;
			DWORD* pDst = (DWORD*)&m_DesiredState.m_pVectorPixelShaderConstant[nStartConst];
			while( nNumConsts && ( pSrc[0] == pDst[0] ) && ( pSrc[1] == pDst[1] ) && ( pSrc[2] == pDst[2] ) && ( pSrc[3] == pDst[3] ) )
			{
				pSrc += 4;
				pDst += 4;
				nNumConsts--;
				nStartConst++;
			}
			if ( !nNumConsts )
				return;
			pValues = reinterpret_cast< float const * >( pSrc );
		}
					
		Dx9Device()->SetPixelShaderConstantF( nStartConst, pValues, nNumConsts );
		memcpy( &m_DynamicState.m_pVectorPixelShaderConstant[nStartConst], pValues, nNumConsts * 4 * sizeof(float) );
	}

	if ( IsX360() && nStartConst + nNumConsts > m_MaxVectorPixelShaderConstant )
	{
		m_MaxVectorPixelShaderConstant = nStartConst + nNumConsts;
		Assert( m_MaxVectorPixelShaderConstant <= 32 );
		if ( m_MaxVectorPixelShaderConstant > 32 )
		{
			// NOTE!  There really are 224 pixel shader constants on the 360, but we do an optimization that only blasts the first 32 always.
			Error( "Don't use more then the first 32 pixel shader constants on the 360!" );
		}
	}

	memcpy( &m_DesiredState.m_pVectorPixelShaderConstant[nStartConst], pValues, nNumConsts * 4 * sizeof(float) );
}

void CShaderAPIDx8::SetPixelShaderConstant( int var, float const* pVec, int numVecs, bool bForce )
{
	SetPixelShaderConstantInternal( var, pVec, numVecs, bForce );
}

template<class T> FORCEINLINE T GetData( uint8 const *pData )
{
	return * ( reinterpret_cast< T const *>( pData ) );
}



void CShaderAPIDx8::SetStandardTextureHandle( StandardTextureId_t nId, ShaderAPITextureHandle_t nHandle )
{
	Assert( nId < ARRAYSIZE( m_StdTextureHandles ) );
	m_StdTextureHandles[nId] = nHandle;
}

void CShaderAPIDx8::ExecuteCommandBuffer( uint8 *pCmdBuf )
{
	uint8 *pReturnStack[20];
	uint8 **pSP = &pReturnStack[ARRAYSIZE(pReturnStack)];
	uint8 *pLastCmd;
	for(;;)
	{
		uint8 *pCmd=pCmdBuf;
		int nCmd = GetData<int>( pCmdBuf );
		switch( nCmd )
		{
			case CBCMD_END:
			{
				if ( pSP == &pReturnStack[ARRAYSIZE(pReturnStack)] )
					return;
				else
				{
					// pop pc
					pCmdBuf = *( pSP ++ );
					break;
				}
			}

			case CBCMD_JUMP:
				pCmdBuf = GetData<uint8 *>(  pCmdBuf + sizeof( int ) );
				break;
				
			case CBCMD_JSR:
			{
				Assert( pSP > &(pReturnStack[0] ) );
// 				*(--pSP ) = pCmdBuf + sizeof( int ) + sizeof( uint8 *);
// 				pCmdBuf = GetData<uint8 *>(  pCmdBuf + sizeof( int ) );
				ExecuteCommandBuffer( GetData<uint8 *>(  pCmdBuf + sizeof( int ) ) );
				pCmdBuf = pCmdBuf + sizeof( int ) + sizeof( uint8 *);
				break;
			}

			case CBCMD_SET_PIXEL_SHADER_FLOAT_CONST:
			{
				int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
				int nNumConsts = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
				float const *pValues = reinterpret_cast< float const *> ( pCmdBuf + 3 * sizeof( int ) );
				pCmdBuf += nNumConsts * 4 * sizeof( float ) + 3 * sizeof( int );
				SetPixelShaderConstantInternal( nStartConst, pValues, nNumConsts, false );
				break;
			}

			case CBCMD_SETPIXELSHADERFOGPARAMS:
			{
				int nReg = GetData<int>( pCmdBuf + sizeof( int ) );
				pCmdBuf += 2 * sizeof( int );
				SetPixelShaderFogParams( nReg );			// !! speed fixme
				break;
			}
			case CBCMD_STORE_EYE_POS_IN_PSCONST:
			{
				int nReg = GetData<int>( pCmdBuf + sizeof( int ) );
				pCmdBuf += 2 * sizeof( int );
				SetPixelShaderConstantInternal( nReg, m_WorldSpaceCameraPositon.Base(), 1, false  );
				break;
			}

			case CBCMD_COMMITPIXELSHADERLIGHTING:
			{
				int nReg = GetData<int>( pCmdBuf + sizeof( int ) );
				pCmdBuf += 2 * sizeof( int );
				CommitPixelShaderLighting( nReg );
				break;
			}

			case CBCMD_SETPIXELSHADERSTATEAMBIENTLIGHTCUBE:
			{
				int nReg = GetData<int>( pCmdBuf + sizeof( int ) );
				pCmdBuf += 2 * sizeof( int );
				float *pCubeBase = m_DynamicState.m_AmbientLightCube[0].Base();
				SetPixelShaderConstantInternal( nReg, pCubeBase, 6, false );
				break;
			}

			case CBCMD_SETAMBIENTCUBEDYNAMICSTATEVERTEXSHADER:
			{
				pCmdBuf +=sizeof( int );
				SetVertexShaderStateAmbientLightCube();
				break;
			}

			case CBCMD_SET_DEPTH_FEATHERING_CONST:
			{
				int nConst = GetData<int>( pCmdBuf + sizeof( int ) );
				float fDepthBlendScale = GetData<float>( pCmdBuf + 2 * sizeof( int ) );
				pCmdBuf += 2 * sizeof( int ) + sizeof( float );
				SetDepthFeatheringPixelShaderConstant( nConst, fDepthBlendScale );
				break;
			}

			case CBCMD_SET_VERTEX_SHADER_FLOAT_CONST:
			{
				int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
				int nNumConsts = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
				float const *pValues = reinterpret_cast< float const *> ( pCmdBuf + 3 * sizeof( int ) );
				pCmdBuf += nNumConsts * 4 * sizeof( float ) + 3 * sizeof( int );
				SetVertexShaderConstantInternal( nStartConst, pValues, nNumConsts, false );
				break;
			}

			case CBCMD_BIND_STANDARD_TEXTURE:
			{
				int nSampler = GetData<int>( pCmdBuf + sizeof( int ) );
				int nTextureID = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
				pCmdBuf += 3 * sizeof( int );
				if ( m_StdTextureHandles[nTextureID] != INVALID_SHADERAPI_TEXTURE_HANDLE )
				{
					BindTexture( (Sampler_t) nSampler, m_StdTextureHandles[nTextureID] );
				}
				else
				{
					ShaderUtil()->BindStandardTexture( (Sampler_t) nSampler, (StandardTextureId_t ) nTextureID );
				}
				break;
			}

			case CBCMD_BIND_SHADERAPI_TEXTURE_HANDLE:
			{
				int nSampler = GetData<int>( pCmdBuf + sizeof( int ) );
				ShaderAPITextureHandle_t hTexture = GetData<ShaderAPITextureHandle_t>( pCmdBuf + 2 * sizeof( int ) );
				Assert( hTexture != INVALID_SHADERAPI_TEXTURE_HANDLE );
				pCmdBuf += 2 * sizeof( int ) + sizeof( ShaderAPITextureHandle_t );
				BindTexture( (Sampler_t) nSampler, hTexture );
				break;
			}

			case CBCMD_SET_PSHINDEX:
			{
				int nIdx = GetData<int>( pCmdBuf + sizeof( int ) );
				ShaderManager()->SetPixelShaderIndex( nIdx );
				pCmdBuf += 2 * sizeof( int );
				break;
			}

			case CBCMD_SET_VSHINDEX:
			{
				int nIdx = GetData<int>( pCmdBuf + sizeof( int ) );
				ShaderManager()->SetVertexShaderIndex( nIdx );
				pCmdBuf += 2 * sizeof( int );
				break;
			}



#ifndef NDEBUG
			default:
			{
				Assert(0);
			}
#endif


		}
		pLastCmd = pCmd;
	}
}
	

//-----------------------------------------------------------------------------
// Sets the boolean registers for pixel shader control flow
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetBooleanPixelShaderConstant( int var, int const* pVec, int numBools, bool bForce )
{
	Assert( pVec );
	Assert( var + numBools <= g_pHardwareConfig->NumBooleanPixelShaderConstants() );

	if ( IsPC() && !bForce && memcmp( pVec, &m_DesiredState.m_pBooleanPixelShaderConstant[var], numBools * sizeof( BOOL ) ) == 0 )
	{
		return;
	}

	if ( IsPC() )
	{
		Dx9Device()->SetPixelShaderConstantB( var, pVec, numBools );
		memcpy( &m_DynamicState.m_pBooleanPixelShaderConstant[var], pVec, numBools * sizeof(BOOL) );
	}

	memcpy( &m_DesiredState.m_pBooleanPixelShaderConstant[var], pVec, numBools * sizeof(BOOL) );

	if ( IsX360() && var + numBools > m_MaxBooleanPixelShaderConstant )
	{
		m_MaxBooleanPixelShaderConstant = var + numBools;
		Assert( m_MaxBooleanPixelShaderConstant <= 16 );
	}
}


//-----------------------------------------------------------------------------
// Sets the integer registers for pixel shader control flow
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetIntegerPixelShaderConstant( int var, int const* pVec, int numIntVecs, bool bForce )
{
	Assert( pVec );
	Assert( var + numIntVecs <= g_pHardwareConfig->NumIntegerPixelShaderConstants() );

	if ( IsPC() && !bForce && memcmp( pVec, &m_DesiredState.m_pIntegerPixelShaderConstant[var], numIntVecs * sizeof( IntVector4D ) ) == 0 )
	{
		return;
	}

	if ( IsPC() )
	{
		Dx9Device()->SetPixelShaderConstantI( var, pVec, numIntVecs );
		memcpy( &m_DynamicState.m_pIntegerPixelShaderConstant[var], pVec, numIntVecs * sizeof(IntVector4D) );
	}

	memcpy( &m_DesiredState.m_pIntegerPixelShaderConstant[var], pVec, numIntVecs * sizeof(IntVector4D) );

	if ( IsX360() && var + numIntVecs > m_MaxIntegerPixelShaderConstant )
	{
		m_MaxIntegerPixelShaderConstant = var + numIntVecs;
		Assert( m_MaxBooleanPixelShaderConstant <= 16 );
	}
}


void CShaderAPIDx8::InvalidateDelayedShaderConstants( void )
{
	m_DelayedShaderConstants.Invalidate();
}

//-----------------------------------------------------------------------------
//
// Methods dealing with texture stage state
//
//-----------------------------------------------------------------------------

//-----------------------------------------------------------------------------
// Gets the texture associated with a texture state...
//-----------------------------------------------------------------------------

inline IDirect3DBaseTexture* CShaderAPIDx8::GetD3DTexture( ShaderAPITextureHandle_t hTexture )
{
	if ( hTexture == INVALID_SHADERAPI_TEXTURE_HANDLE )
	{
		return NULL;
	}

	AssertValidTextureHandle( hTexture );

	Texture_t& tex = GetTexture( hTexture );
	if ( tex.m_NumCopies == 1 )
	{
		return tex.GetTexture();
	}
	else
	{
		return tex.GetTexture( tex.m_CurrentCopy );
	}
}

//-----------------------------------------------------------------------------
// Inline methods
//-----------------------------------------------------------------------------
inline ShaderAPITextureHandle_t CShaderAPIDx8::GetModifyTextureHandle() const
{
	return m_ModifyTextureHandle;
}

inline IDirect3DBaseTexture* CShaderAPIDx8::GetModifyTexture()
{
	return CShaderAPIDx8::GetD3DTexture( m_ModifyTextureHandle );
}

void CShaderAPIDx8::SetModifyTexture( IDirect3DBaseTexture* pTex )
{
	if ( m_ModifyTextureHandle == INVALID_SHADERAPI_TEXTURE_HANDLE )
		return;
	
	Texture_t& tex = GetTexture( m_ModifyTextureHandle );
	if ( tex.m_NumCopies == 1 )
	{
		tex.SetTexture( pTex );
	}
	else
	{
		tex.SetTexture( tex.m_CurrentCopy, pTex );
	}
}

inline ShaderAPITextureHandle_t CShaderAPIDx8::GetBoundTextureBindId( Sampler_t sampler ) const
{
	return SamplerState( sampler ).m_BoundTexture;
}

inline bool CShaderAPIDx8::WouldBeOverTextureLimit( ShaderAPITextureHandle_t hTexture )
{
	if ( IsPC() )
	{
		if ( mat_texture_limit.GetInt() < 0 )
			return false;

		Texture_t &tex = GetTexture( hTexture );
		if ( tex.m_LastBoundFrame == m_CurrentFrame )
			return false;

		return m_nTextureMemoryUsedLastFrame + tex.GetMemUsage() > (mat_texture_limit.GetInt() * 1024);
	}
	return false;
}


#define SETSAMPLESTATEANDMIRROR( sampler, samplerState, state_type, mirror_field, value )	\
	if ( samplerState.mirror_field != value )												\
	{																						\
		samplerState.mirror_field = value;													\
		::SetSamplerState( g_pD3DDevice, sampler, state_type, value );	\
	}

#define SETSAMPLESTATEANDMIRROR_FLOAT( sampler, samplerState, state_type, mirror_field, value )	\
	if ( samplerState.mirror_field != value )												\
		{																					\
		samplerState.mirror_field = value;													\
		::SetSamplerState( g_pD3DDevice, sampler, state_type, *(DWORD*)&value );	\
		}


//-----------------------------------------------------------------------------
// Sets state on the board related to the texture state
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetTextureState( Sampler_t sampler, ShaderAPITextureHandle_t hTexture, bool force )
{
	// Get the dynamic texture info
	SamplerState_t &samplerState = SamplerState( sampler );

	// Set the texture state, but only if it changes
	if ( ( samplerState.m_BoundTexture == hTexture ) && !force )
		return;

	// Disabling texturing
	if ( hTexture == INVALID_SHADERAPI_TEXTURE_HANDLE || WouldBeOverTextureLimit( hTexture ) )
	{
		Dx9Device()->SetTexture( sampler, 0 );
		return;
	}

	samplerState.m_BoundTexture = hTexture;

	// Don't set this if we're disabled
	if ( !samplerState.m_TextureEnable )
		return;

	RECORD_COMMAND( DX8_SET_TEXTURE, 3 );
	RECORD_INT( stage );
	RECORD_INT( hTexture );
	RECORD_INT( GetTexture( hTexture).m_CurrentCopy );

	IDirect3DBaseTexture *pTexture = CShaderAPIDx8::GetD3DTexture( hTexture );

#if defined( _X360 )
	DWORD linearFormatBackup = pTexture->Format.dword[0]; 
	if ( samplerState.m_SRGBReadEnable ) 
	{
		// convert to srgb format for the bind. This effectively emulates the old srgb read sampler state
		pTexture->Format.SignX = 
			pTexture->Format.SignY = 
			pTexture->Format.SignZ = 3; 
	}
#endif

	Dx9Device()->SetTexture( sampler, pTexture );

#if defined( _X360 )
	// put the format back in linear space
	pTexture->Format.dword[0] = linearFormatBackup;
#endif

	Texture_t &tex = GetTexture( hTexture );
	if ( tex.m_LastBoundFrame != m_CurrentFrame )
	{
		tex.m_LastBoundFrame = m_CurrentFrame;
		tex.m_nTimesBoundThisFrame = 0;

		if ( tex.m_pTextureGroupCounterFrame )
		{
			// Update the per-frame texture group counter.
			*tex.m_pTextureGroupCounterFrame += tex.GetMemUsage();
		}

		// Track memory usage.
		m_nTextureMemoryUsedLastFrame += tex.GetMemUsage();
	}

	if ( !m_bDebugTexturesRendering )
		++tex.m_nTimesBoundThisFrame;

	tex.m_nTimesBoundMax = MAX( tex.m_nTimesBoundMax, tex.m_nTimesBoundThisFrame );

	static MaterialSystem_Config_t &materialSystemConfig = ShaderUtil()->GetConfig();

	D3DTEXTUREFILTERTYPE minFilter = tex.m_MinFilter;
	D3DTEXTUREFILTERTYPE magFilter = tex.m_MagFilter;
	D3DTEXTUREFILTERTYPE mipFilter = tex.m_MipFilter;
	int finestMipmapLevel		   = tex.m_FinestMipmapLevel;
	float lodBias				   = tex.m_LodBias;

	if ( materialSystemConfig.bMipMapTextures == 0 )
	{
		mipFilter = D3DTEXF_NONE;
	} 

	if ( materialSystemConfig.bFilterTextures == 0 && tex.m_NumLevels > 1 ) 
	{
		minFilter = D3DTEXF_NONE;
		magFilter = D3DTEXF_NONE;
		mipFilter = D3DTEXF_POINT;
	}

	D3DTEXTUREADDRESS uTexWrap = tex.m_UTexWrap;
	D3DTEXTUREADDRESS vTexWrap = tex.m_VTexWrap;
	D3DTEXTUREADDRESS wTexWrap = tex.m_WTexWrap;

	// For now do this the old way on OSX since the dxabstract layer doesn't support SetSamplerStates
	// ###OSX### punting on OSX for now
#if DX_TO_GL_ABSTRACTION && !OSX
	if ( ( samplerState.m_MinFilter != minFilter ) || ( samplerState.m_MagFilter != magFilter ) || ( samplerState.m_MipFilter != mipFilter ) ||
		( samplerState.m_UTexWrap != uTexWrap ) || ( samplerState.m_VTexWrap != vTexWrap ) || ( samplerState.m_WTexWrap != wTexWrap ) || 
		( samplerState.m_FinestMipmapLevel != finestMipmapLevel ) || ( samplerState.m_LodBias != lodBias ) )
	{
		samplerState.m_UTexWrap = uTexWrap;
		samplerState.m_VTexWrap = vTexWrap;
		samplerState.m_WTexWrap = wTexWrap;
		samplerState.m_MinFilter = minFilter;
		samplerState.m_MagFilter = magFilter;
		samplerState.m_MipFilter = mipFilter;
		samplerState.m_FinestMipmapLevel = finestMipmapLevel;
		samplerState.m_LodBias = lodBias;
		Dx9Device()->SetSamplerStates( sampler, uTexWrap, vTexWrap, wTexWrap, minFilter, magFilter, mipFilter, finestMipmapLevel, lodBias );
	}
#else
	SETSAMPLESTATEANDMIRROR( sampler, samplerState, D3DSAMP_ADDRESSU, m_UTexWrap, uTexWrap );
	SETSAMPLESTATEANDMIRROR( sampler, samplerState, D3DSAMP_ADDRESSV, m_VTexWrap, vTexWrap );
	SETSAMPLESTATEANDMIRROR( sampler, samplerState, D3DSAMP_ADDRESSW, m_WTexWrap, wTexWrap );
	SETSAMPLESTATEANDMIRROR( sampler, samplerState, D3DSAMP_MINFILTER, m_MinFilter, minFilter );
	SETSAMPLESTATEANDMIRROR( sampler, samplerState, D3DSAMP_MAGFILTER, m_MagFilter, magFilter );
	SETSAMPLESTATEANDMIRROR( sampler, samplerState, D3DSAMP_MIPFILTER, m_MipFilter, mipFilter );
	SETSAMPLESTATEANDMIRROR( sampler, samplerState, D3DSAMP_MAXMIPLEVEL, m_FinestMipmapLevel, finestMipmapLevel );
	SETSAMPLESTATEANDMIRROR_FLOAT( sampler, samplerState, D3DSAMP_MIPMAPLODBIAS, m_LodBias, lodBias );
	
#endif
}

void CShaderAPIDx8::BindTexture( Sampler_t sampler, ShaderAPITextureHandle_t textureHandle )
{
	LOCK_SHADERAPI();
	SetTextureState( sampler, textureHandle );
}

void CShaderAPIDx8::BindVertexTexture( VertexTextureSampler_t nStage, ShaderAPITextureHandle_t textureHandle )
{
	Assert( g_pMaterialSystemHardwareConfig->GetVertexTextureCount() != 0 );
	LOCK_SHADERAPI();
	ADD_VERTEX_TEXTURE_FUNC( COMMIT_PER_PASS, COMMIT_VERTEX_SHADER, 
		CommitVertexTextures, nStage, m_BoundTexture, textureHandle );
}


//-----------------------------------------------------------------------------
// Texture allocation/deallocation
//-----------------------------------------------------------------------------

//-----------------------------------------------------------------------------
// Computes stats info for a texture
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ComputeStatsInfo( ShaderAPITextureHandle_t hTexture, bool isCubeMap, bool isVolumeTexture )
{
	Texture_t &textureData = GetTexture( hTexture );

	textureData.m_SizeBytes = 0;
	textureData.m_SizeTexels = 0;
	textureData.m_LastBoundFrame = -1;
	if ( IsX360() )
	{
		textureData.m_nTimesBoundThisFrame = 0;
	}

	IDirect3DBaseTexture* pD3DTex = CShaderAPIDx8::GetD3DTexture( hTexture );

	if ( IsPC() || !IsX360() )
	{
		if ( isCubeMap )
		{
			IDirect3DCubeTexture* pTex = static_cast<IDirect3DCubeTexture*>(pD3DTex);
			if ( !pTex )
			{
				Assert( 0 );
				return;
			}

			int numLevels = pTex->GetLevelCount();
			for (int i = 0; i < numLevels; ++i)
			{
				D3DSURFACE_DESC desc;
				HRESULT hr = pTex->GetLevelDesc( i, &desc );
				Assert( !FAILED(hr) );
				textureData.m_SizeBytes += 6 * ImageLoader::GetMemRequired( desc.Width, desc.Height, 1, textureData.GetImageFormat(), false );
				textureData.m_SizeTexels += 6 * desc.Width * desc.Height;
			}
		}
		else if ( isVolumeTexture )
		{
			IDirect3DVolumeTexture9* pTex = static_cast<IDirect3DVolumeTexture9*>(pD3DTex);
			if ( !pTex )
			{
				Assert( 0 );
				return;
			}
			int numLevels = pTex->GetLevelCount();
			for (int i = 0; i < numLevels; ++i)
			{
				D3DVOLUME_DESC desc;
				HRESULT hr = pTex->GetLevelDesc( i, &desc );
				Assert( !FAILED( hr ) );
				textureData.m_SizeBytes += ImageLoader::GetMemRequired( desc.Width, desc.Height, desc.Depth, textureData.GetImageFormat(), false );
				textureData.m_SizeTexels += desc.Width * desc.Height;
			}
		}
		else
		{
			IDirect3DTexture* pTex = static_cast<IDirect3DTexture*>(pD3DTex);
			if ( !pTex )
			{
				Assert( 0 );
				return;
			}

			int numLevels = pTex->GetLevelCount();
			for (int i = 0; i < numLevels; ++i)
			{
				D3DSURFACE_DESC desc;
				HRESULT hr = pTex->GetLevelDesc( i, &desc );
				Assert( !FAILED( hr ) );
				textureData.m_SizeBytes += ImageLoader::GetMemRequired( desc.Width, desc.Height, 1, textureData.GetImageFormat(), false );
				textureData.m_SizeTexels += desc.Width * desc.Height;
			}
		}
	}

#if defined( _X360 )
	// 360 uses gpu storage size (which accounts for page alignment bloat), not format size
	textureData.m_SizeBytes = g_TextureHeap.GetSize( pD3DTex );
#endif
}

static D3DFORMAT ComputeFormat( IDirect3DBaseTexture* pTexture, bool isCubeMap )
{
	Assert( pTexture );
	D3DSURFACE_DESC desc;
	if (isCubeMap)
	{
		IDirect3DCubeTexture* pTex = static_cast<IDirect3DCubeTexture*>(pTexture);
		HRESULT hr = pTex->GetLevelDesc( 0, &desc );
		Assert( !FAILED(hr) );
	}
	else
	{
		IDirect3DTexture* pTex = static_cast<IDirect3DTexture*>(pTexture);
		HRESULT hr = pTex->GetLevelDesc( 0, &desc );
		Assert( !FAILED(hr) );
	}
	return desc.Format;
}

ShaderAPITextureHandle_t CShaderAPIDx8::CreateDepthTexture( 
	ImageFormat renderTargetFormat, 
	int width, 
	int height, 
	const char *pDebugName,
	bool bTexture )
{
	LOCK_SHADERAPI();

	ShaderAPITextureHandle_t i = CreateTextureHandle();
	Texture_t *pTexture = &GetTexture( i );

	pTexture->m_Flags = Texture_t::IS_ALLOCATED;
	if( bTexture )
	{
		pTexture->m_Flags |= Texture_t::IS_DEPTH_STENCIL_TEXTURE;
	}
	else
	{
		pTexture->m_Flags |= Texture_t::IS_DEPTH_STENCIL;
	}

	pTexture->m_DebugName = pDebugName;
	pTexture->m_Width = width;
	pTexture->m_Height = height;
	pTexture->m_Depth = 1;			// fake
	pTexture->m_Count = 1;			// created single texture
	pTexture->m_CountIndex = 0;		// created single texture
	pTexture->m_CreationFlags = 0;	// fake
	pTexture->m_NumCopies = 1;
	pTexture->m_CurrentCopy = 0;

	ImageFormat renderFormat = FindNearestSupportedFormat( renderTargetFormat, false, true, false );
#if defined( _X360 )
	D3DFORMAT nDepthFormat = ReverseDepthOnX360() ? D3DFMT_D24FS8 : D3DFMT_D24S8;
#else
	D3DFORMAT nDepthFormat = m_bUsingStencil ? D3DFMT_D24S8 : D3DFMT_D24X8;
#endif
	D3DFORMAT format = FindNearestSupportedDepthFormat( m_nAdapter, m_AdapterFormat, renderFormat, nDepthFormat );
	D3DMULTISAMPLE_TYPE multisampleType = D3DMULTISAMPLE_NONE;

	pTexture->m_NumLevels = 1;
	pTexture->m_SizeTexels = width * height;
	pTexture->m_SizeBytes = ImageLoader::GetMemRequired( width, height, 1, renderFormat, false );

	RECORD_COMMAND( DX8_CREATE_DEPTH_TEXTURE, 5 );
	RECORD_INT( i );
	RECORD_INT( width );
	RECORD_INT( height );
	RECORD_INT( format );
	RECORD_INT( multisampleType );

	HRESULT hr;
	if ( !bTexture )
	{
#if defined( _X360 )
		int backWidth, backHeight;
		ShaderAPI()->GetBackBufferDimensions( backWidth, backHeight );
		D3DFORMAT backBufferFormat = ImageLoader::ImageFormatToD3DFormat( g_pShaderDevice->GetBackBufferFormat() );
		// immediately follows back buffer in EDRAM
		D3DSURFACE_PARAMETERS surfParameters;
		surfParameters.Base = 2*XGSurfaceSize( backWidth, backHeight, backBufferFormat, D3DMULTISAMPLE_NONE );
		surfParameters.ColorExpBias = 0;
		surfParameters.HierarchicalZBase = 0;
		hr = Dx9Device()->CreateDepthStencilSurface(
			width, height, format, multisampleType, 0, TRUE, &pTexture->GetDepthStencilSurface(), &surfParameters );
#else
		hr = Dx9Device()->CreateDepthStencilSurface(
			width, height, format, multisampleType, 0, TRUE, &pTexture->GetDepthStencilSurface(), NULL );
#endif
	}
	else
	{
		IDirect3DTexture9 *pTex;
		hr = Dx9Device()->CreateTexture( width, height, 1, D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT, &pTex, NULL );
		pTexture->SetTexture( pTex );
	}

    if ( FAILED( hr ) )
	{
		switch( hr )
		{
		case D3DERR_INVALIDCALL:
			Warning( "ShaderAPIDX8::CreateDepthStencilSurface: D3DERR_INVALIDCALL\n" );
			break;
		case D3DERR_OUTOFVIDEOMEMORY:
			Warning( "ShaderAPIDX8::CreateDepthStencilSurface: D3DERR_OUTOFVIDEOMEMORY\n" );
			break;
		default:
			break;
		}
		Assert( 0 );
	}

#ifdef _XBOX
	D3DSURFACE_DESC desc;
	hr = pTexture->GetDepthStencilSurface()->GetDesc( &desc );
	Assert( !FAILED( hr ) );

	pTexture->m_nTimesBoundThisFrame = 0;
	pTexture->m_StaticSizeBytes  = desc.Size;
	pTexture->m_DynamicSizeBytes = 0;
	pTexture->m_DebugName        = pDebugName;
	pTexture->m_TextureGroupName = TEXTURE_GROUP_RENDER_TARGET;
	pTexture->SetImageFormat( IMAGE_FORMAT_UNKNOWN );
#endif

	return i;
}


// FIXME!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Could keep a free-list for this instead of linearly searching.  We
// don't create textures all the time, so this is probably fine for now.
// FIXME!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
ShaderAPITextureHandle_t CShaderAPIDx8::CreateTextureHandle( void )
{
	ShaderAPITextureHandle_t handle;
	CreateTextureHandles( &handle, 1 );
	return handle;
}

void CShaderAPIDx8::CreateTextureHandles( ShaderAPITextureHandle_t *handles, int count )
{
	TM_ZONE_DEFAULT( TELEMETRY_LEVEL0 );

	if ( count <= 0 )
		return;

	MEM_ALLOC_CREDIT();

	int idxCreating = 0;
	ShaderAPITextureHandle_t hTexture;

	{
		tmZone( TELEMETRY_LEVEL0, TMZF_NONE, "%s - Search", __FUNCTION__ );

		for ( hTexture = m_Textures.Head(); hTexture != m_Textures.InvalidIndex(); hTexture = m_Textures.Next( hTexture ) )
		{
			if ( !( m_Textures[hTexture].m_Flags & Texture_t::IS_ALLOCATED ) )
			{
				handles[ idxCreating ++ ] = hTexture;
				if ( idxCreating >= count )
					return;
			}
		}
	}

	tmZone( TELEMETRY_LEVEL0, TMZF_NONE, "%s - Add", __FUNCTION__ );
	while ( idxCreating < count )
		handles[ idxCreating ++ ] = m_Textures.AddToTail();
}

//-----------------------------------------------------------------------------
// Creates a lovely texture
//-----------------------------------------------------------------------------

ShaderAPITextureHandle_t CShaderAPIDx8::CreateTexture(
	int			width, 
	int			height, 
	int			depth,
	ImageFormat dstImageFormat, 
	int			numMipLevels, 
	int			numCopies, 
	int			creationFlags,
	const char *pDebugName,
	const char *pTextureGroupName )
{
	ShaderAPITextureHandle_t handle = 0;
	CreateTextures( &handle, 1, width, height, depth, dstImageFormat, numMipLevels, numCopies, creationFlags, pDebugName, pTextureGroupName );
	return handle;
}

void CShaderAPIDx8::CreateTextures(
	ShaderAPITextureHandle_t *pHandles,
	int			count,
	int			width, 
	int			height, 
	int			depth,
	ImageFormat dstImageFormat, 
	int			numMipLevels, 
	int			numCopies, 
	int			creationFlags,
	const char *pDebugName,
	const char *pTextureGroupName )
{
	TM_ZONE_DEFAULT( TELEMETRY_LEVEL0 );

	LOCK_SHADERAPI();

	tmZone( TELEMETRY_LEVEL0, TMZF_NONE, "%s - PostLock", __FUNCTION__ );

	Assert( this == g_pShaderAPI );

	if ( depth == 0 )
	{
		depth = 1;
	}

	bool isCubeMap = (creationFlags & TEXTURE_CREATE_CUBEMAP) != 0;
	bool isRenderTarget = (creationFlags & TEXTURE_CREATE_RENDERTARGET) != 0;
	bool managed = (creationFlags & TEXTURE_CREATE_MANAGED) != 0;
	bool isDepthBuffer = (creationFlags & TEXTURE_CREATE_DEPTHBUFFER) != 0;
	bool isDynamic = (creationFlags & TEXTURE_CREATE_DYNAMIC) != 0;
	bool isSRGB = (creationFlags & TEXTURE_CREATE_SRGB) != 0;			// for Posix/GL only... not used here ?

#if defined(IS_WINDOWS_PC) && defined(SHADERAPIDX9)
	extern bool g_ShaderDeviceUsingD3D9Ex;
	if ( g_ShaderDeviceUsingD3D9Ex && managed )
	{
		// Managed textures aren't available under D3D9Ex, but we never lose
		// texture data, so it's ok to use the default pool. Really. We can't
		// lock default-pool textures like we normally would to upload, but we
		// have special logic to blit full updates via D3DX helper functions
		// in D3D9Ex mode (see texturedx8.cpp)
		managed = false;
		creationFlags &= ~TEXTURE_CREATE_MANAGED;
	}
#endif

	// Can't be both managed + dynamic. Dynamic is an optimization, but 
	// if it's not managed, then we gotta do special client-specific stuff
	// So, managed wins out!
	if ( managed )
	{
		creationFlags &= ~TEXTURE_CREATE_DYNAMIC;
		isDynamic = false;
	}

	

	// Create a set of texture handles
	CreateTextureHandles( pHandles, count );
	Texture_t **arrTxp = ( Texture_t ** ) stackalloc( count * sizeof( Texture_t * ) );

	unsigned short usSetFlags = 0;
	usSetFlags |= ( IsPosix() || ( creationFlags & (TEXTURE_CREATE_DYNAMIC | TEXTURE_CREATE_MANAGED) ) ) ? Texture_t::IS_LOCKABLE : 0;
	usSetFlags |= ( creationFlags & TEXTURE_CREATE_VERTEXTEXTURE) ? Texture_t::IS_VERTEX_TEXTURE : 0;
#if defined( _X360 )
	usSetFlags |= ( creationFlags & TEXTURE_CREATE_RENDERTARGET ) ? Texture_t::IS_RENDER_TARGET : 0;
	usSetFlags |= ( creationFlags & TEXTURE_CREATE_CANCONVERTFORMAT ) ? Texture_t::CAN_CONVERT_FORMAT : 0;
#endif

	tmZone( TELEMETRY_LEVEL0, TMZF_NONE, "%s - CreateFrames", __FUNCTION__ );

	for ( int idxFrame = 0; idxFrame < count; ++ idxFrame )
	{
		arrTxp[ idxFrame ] = &GetTexture( pHandles[ idxFrame ] );
		Texture_t *pTexture = arrTxp[ idxFrame ];
		pTexture->m_Flags = Texture_t::IS_ALLOCATED;
		pTexture->m_DebugName = pDebugName;
		pTexture->m_Width = width;
		pTexture->m_Height = height;
		pTexture->m_Depth = depth;
		pTexture->m_Count = count;
		pTexture->m_CountIndex = idxFrame;

		pTexture->m_CreationFlags = creationFlags;
		pTexture->m_Flags |= usSetFlags;

		RECORD_COMMAND( DX8_CREATE_TEXTURE, 12 );
		RECORD_INT( textureHandle );
		RECORD_INT( width );
		RECORD_INT( height );
		RECORD_INT( depth );		// depth for volume textures
		RECORD_INT( ImageLoader::ImageFormatToD3DFormat( FindNearestSupportedFormat(dstImageFormat)) );
		RECORD_INT( numMipLevels );
		RECORD_INT( isCubeMap );
		RECORD_INT( numCopies <= 1 ? 1 : numCopies );
		RECORD_INT( isRenderTarget ? 1 : 0 );
		RECORD_INT( managed );
		RECORD_INT( isDepthBuffer ? 1 : 0 );
		RECORD_INT( isDynamic ? 1 : 0 );

		IDirect3DBaseTexture* pD3DTex;

		// Set the initial texture state
		if ( numCopies <= 1 )
		{
			tmZone( TELEMETRY_LEVEL0, TMZF_NONE, "%s - CreateD3DTexture", __FUNCTION__ );

			pTexture->m_NumCopies = 1;
			pD3DTex = CreateD3DTexture( width, height, depth, dstImageFormat, numMipLevels, creationFlags, (char*)pDebugName );
			pTexture->SetTexture( pD3DTex );
		}
		else
		{
			tmZone( TELEMETRY_LEVEL0, TMZF_NONE, "%s - CreateD3DTexture", __FUNCTION__ );

			pTexture->m_NumCopies = numCopies;
			{
// X360TEMP
//				MEM_ALLOC_CREDIT();
				pTexture->GetTextureArray() = new IDirect3DBaseTexture* [numCopies];
			}
			for (int k = 0; k < numCopies; ++k)
			{
				pD3DTex = CreateD3DTexture( width, height, depth, dstImageFormat, numMipLevels, creationFlags, (char*)pDebugName );
				pTexture->SetTexture( k, pD3DTex );
			}
		}
		pTexture->m_CurrentCopy = 0;

		pD3DTex = CShaderAPIDx8::GetD3DTexture( pHandles[ idxFrame ] );

#if defined( _X360 )
		if ( pD3DTex )
		{
			D3DSURFACE_DESC desc;
			HRESULT	hr;
			if ( creationFlags & TEXTURE_CREATE_CUBEMAP )
			{
				hr = ((IDirect3DCubeTexture *)pD3DTex)->GetLevelDesc( 0, &desc );
			}
			else
			{
				hr = ((IDirect3DTexture *)pD3DTex)->GetLevelDesc( 0, &desc );
			}
			Assert( !FAILED( hr ) );

			// for proper info get the actual format because the input format may have been redirected
			dstImageFormat = ImageLoader::D3DFormatToImageFormat( desc.Format );
			Assert( dstImageFormat != IMAGE_FORMAT_UNKNOWN );

			// track linear or tiled
			if ( !XGIsTiledFormat( desc.Format ) )
			{
				pTexture->m_Flags |= Texture_t::IS_LINEAR;
			}
		}
#endif

		pTexture->SetImageFormat( dstImageFormat );
		pTexture->m_UTexWrap = D3DTADDRESS_CLAMP;
		pTexture->m_VTexWrap = D3DTADDRESS_CLAMP;
		pTexture->m_WTexWrap = D3DTADDRESS_CLAMP;

		if ( isRenderTarget )
		{
#if !defined( _X360 )
			if ( ( dstImageFormat == IMAGE_FORMAT_NV_INTZ   ) || ( dstImageFormat == IMAGE_FORMAT_NV_RAWZ   ) || 
				 ( dstImageFormat == IMAGE_FORMAT_ATI_DST16 ) || ( dstImageFormat == IMAGE_FORMAT_ATI_DST24 ) )
			{
				pTexture->m_MinFilter = pTexture->m_MagFilter = D3DTEXF_POINT;
			}
			else
#endif
			{
				pTexture->m_MinFilter = pTexture->m_MagFilter = D3DTEXF_LINEAR;
			}

			pTexture->m_NumLevels = 1;
			pTexture->m_MipFilter = D3DTEXF_NONE;
		}
		else
		{
			pTexture->m_NumLevels = pD3DTex ? pD3DTex->GetLevelCount() : 1;
			pTexture->m_MipFilter = (pTexture->m_NumLevels != 1) ? D3DTEXF_LINEAR : D3DTEXF_NONE;
			pTexture->m_MinFilter = pTexture->m_MagFilter = D3DTEXF_LINEAR;
		}
		pTexture->m_SwitchNeeded = false;
		
		ComputeStatsInfo( pHandles[idxFrame], isCubeMap, (depth > 1) );
		SetupTextureGroup( pHandles[idxFrame], pTextureGroupName );
	}
}

void CShaderAPIDx8::SetupTextureGroup( ShaderAPITextureHandle_t hTexture, const char *pTextureGroupName )
{
	Texture_t *pTexture = &GetTexture( hTexture );

	Assert( !pTexture->m_pTextureGroupCounterGlobal );
	
	// Setup the texture group stuff.
	if ( pTextureGroupName && pTextureGroupName[0] != 0 )
	{
		pTexture->m_TextureGroupName = pTextureGroupName;
	}
	else
	{
		pTexture->m_TextureGroupName = TEXTURE_GROUP_UNACCOUNTED;
	}

	// 360 cannot vprof due to multicore loading until vprof is reentrant and these counters are real.
#if defined( VPROF_ENABLED ) && !defined( _X360 )
	char counterName[256];
	Q_snprintf( counterName, sizeof( counterName ), "TexGroup_global_%s", pTexture->m_TextureGroupName.String() );
	pTexture->m_pTextureGroupCounterGlobal = g_VProfCurrentProfile.FindOrCreateCounter( counterName, COUNTER_GROUP_TEXTURE_GLOBAL );

	Q_snprintf( counterName, sizeof( counterName ), "TexGroup_frame_%s", pTexture->m_TextureGroupName.String() );
	pTexture->m_pTextureGroupCounterFrame = g_VProfCurrentProfile.FindOrCreateCounter( counterName, COUNTER_GROUP_TEXTURE_PER_FRAME );
#else
	pTexture->m_pTextureGroupCounterGlobal = NULL;
	pTexture->m_pTextureGroupCounterFrame  = NULL;
#endif

	if ( pTexture->m_pTextureGroupCounterGlobal )
	{
		*pTexture->m_pTextureGroupCounterGlobal += pTexture->GetMemUsage();
	}
}

//-----------------------------------------------------------------------------
// Deletes a texture...
//-----------------------------------------------------------------------------
void CShaderAPIDx8::DeleteD3DTexture( ShaderAPITextureHandle_t hTexture )
{
	int numDeallocated = 0;
	Texture_t &texture = GetTexture( hTexture );

	if ( texture.m_Flags & Texture_t::IS_DEPTH_STENCIL )
	{
		// garymcthack - need to make sure that playback knows how to deal with these.
		RECORD_COMMAND( DX8_DESTROY_DEPTH_TEXTURE, 1 );
		RECORD_INT( hTexture );

		if ( texture.GetDepthStencilSurface() )
		{
			int nRetVal = texture.GetDepthStencilSurface()->Release();
			Assert( nRetVal == 0 );
			texture.GetDepthStencilSurface() = 0;
			numDeallocated = 1;
		}
		else
		{
			// FIXME: we hit this on shutdown of HLMV on some machines
			Assert( 0 );
		}
	}
	else if ( texture.m_NumCopies == 1 )
	{
		if ( texture.GetTexture() )
		{
			RECORD_COMMAND( DX8_DESTROY_TEXTURE, 1 );
			RECORD_INT( hTexture );

			DestroyD3DTexture( texture.GetTexture() );
			texture.SetTexture( 0 );
			numDeallocated = 1;
		}
	}
	else
	{
		if ( texture.GetTextureArray() )
		{
			RECORD_COMMAND( DX8_DESTROY_TEXTURE, 1 );
			RECORD_INT( hTexture );

			// Multiple copy texture
			for (int j = 0; j < texture.m_NumCopies; ++j)
			{
				if (texture.GetTexture( j ))
				{
					DestroyD3DTexture( texture.GetTexture( j ) );
					texture.SetTexture( j, 0 );
					++numDeallocated;
				}
			}

			delete [] texture.GetTextureArray();
			texture.GetTextureArray() = 0;
		}
	}

	texture.m_NumCopies = 0;

	// Remove this texture from its global texture group counter.
	if ( texture.m_pTextureGroupCounterGlobal )
	{
		*texture.m_pTextureGroupCounterGlobal -= texture.GetMemUsage();
		Assert( *texture.m_pTextureGroupCounterGlobal >= 0 );
		texture.m_pTextureGroupCounterGlobal = NULL;
	}

	// remove this texture from std textures
	for( int i=0 ; i < ARRAYSIZE( m_StdTextureHandles ) ; i++ )
	{
		if ( m_StdTextureHandles[i] == hTexture )
			m_StdTextureHandles[i] = INVALID_SHADERAPI_TEXTURE_HANDLE;
	}

}


//-----------------------------------------------------------------------------
// Unbinds a texture from all texture stages
//-----------------------------------------------------------------------------
void CShaderAPIDx8::UnbindTexture( ShaderAPITextureHandle_t hTexture )
{
	// Make sure no texture units are currently bound to it...
	for ( int unit = 0; unit < g_pHardwareConfig->GetSamplerCount(); ++unit )
	{
		if ( hTexture == SamplerState( unit ).m_BoundTexture )
		{
			// Gotta set this here because INVALID_SHADERAPI_TEXTURE_HANDLE means don't actually
			// set bound texture (it's used for disabling texturemapping)
			SamplerState( unit ).m_BoundTexture = INVALID_SHADERAPI_TEXTURE_HANDLE;
			SetTextureState( (Sampler_t)unit, INVALID_SHADERAPI_TEXTURE_HANDLE );
		}
	}

	int nVertexTextureCount = g_pHardwareConfig->GetVertexTextureCount();
	for ( int nSampler = 0; nSampler < nVertexTextureCount; ++nSampler )
	{
		if ( hTexture == m_DynamicState.m_VertexTextureState[ nSampler ].m_BoundTexture )
		{
			// Gotta set this here because INVALID_SHADERAPI_TEXTURE_HANDLE means don't actually
			// set bound texture (it's used for disabling texturemapping)
			BindVertexTexture( (VertexTextureSampler_t)nSampler, INVALID_SHADERAPI_TEXTURE_HANDLE );
		}
	}
}


//-----------------------------------------------------------------------------
// Deletes a texture...
//-----------------------------------------------------------------------------
void CShaderAPIDx8::DeleteTexture( ShaderAPITextureHandle_t textureHandle )
{
	LOCK_SHADERAPI();
	AssertValidTextureHandle( textureHandle );

	if ( !TextureIsAllocated( textureHandle ) )
	{
		// already deallocated
		return;
	}
	
	// Unbind it!
	UnbindTexture( textureHandle );

	// Delete it baby
	DeleteD3DTexture( textureHandle );

	// Now remove the texture from the list
	// Mark as deallocated so that it can be reused.
	GetTexture( textureHandle ).m_Flags = 0;
}


void CShaderAPIDx8::WriteTextureToFile( ShaderAPITextureHandle_t hTexture, const char *szFileName )
{
	Texture_t *pTexInt = &GetTexture( hTexture );
	//Assert( pTexInt->IsCubeMap() == false );
	//Assert( pTexInt->IsVolumeTexture() == false );
	IDirect3DTexture *pD3DTexture = (IDirect3DTexture *)pTexInt->GetTexture();

	// Get the level of the texture we want to read from
	IDirect3DSurface* pTextureLevel;
	HRESULT hr = pD3DTexture ->GetSurfaceLevel( 0, &pTextureLevel );
	if ( FAILED( hr ) )
		return;

	D3DSURFACE_DESC surfaceDesc;
	pD3DTexture->GetLevelDesc( 0, &surfaceDesc );

	D3DLOCKED_RECT	lockedRect;

	
	//if( pTexInt->m_Flags & Texture_t::IS_RENDER_TARGET )
#if !defined( _X360 ) //TODO: x360 version
	{
		//render targets can't be locked, luckily we can copy the surface to system memory and lock that.
		IDirect3DSurface *pSystemSurface;

		Assert( !IsX360() );

		hr = Dx9Device()->CreateOffscreenPlainSurface( surfaceDesc.Width, surfaceDesc.Height, surfaceDesc.Format, D3DPOOL_SYSTEMMEM, &pSystemSurface, NULL );
		Assert( SUCCEEDED( hr ) );

		pSystemSurface->GetDesc( &surfaceDesc );

		hr = Dx9Device()->GetRenderTargetData( pTextureLevel, pSystemSurface );
		Assert( SUCCEEDED( hr ) );

		//pretend this is the texture level we originally grabbed with GetSurfaceLevel() and continue on
		pTextureLevel->Release();
		pTextureLevel = pSystemSurface;
	}
#endif

	// lock the region
	if ( FAILED( pTextureLevel->LockRect( &lockedRect, NULL, D3DLOCK_READONLY ) ) )
	{
		Assert( 0 );
		pTextureLevel->Release();
		return;
	}

	TGAWriter::WriteTGAFile( szFileName, surfaceDesc.Width, surfaceDesc.Height, pTexInt->GetImageFormat(), (const uint8 *)lockedRect.pBits, lockedRect.Pitch );

	if ( FAILED( pTextureLevel->UnlockRect() ) ) 
	{
		Assert( 0 );
		pTextureLevel->Release();
		return;
	}

	pTextureLevel->Release();
}


//-----------------------------------------------------------------------------
// Releases all textures
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ReleaseAllTextures()
{
	ClearStdTextureHandles();
	ShaderAPITextureHandle_t hTexture;
	for ( hTexture = m_Textures.Head(); hTexture != m_Textures.InvalidIndex(); hTexture = m_Textures.Next( hTexture ) )
	{
		if ( TextureIsAllocated( hTexture ) )
		{
			// Delete it baby
			DeleteD3DTexture( hTexture );
		}
	}

	// Make sure all texture units are pointing to nothing
	for (int unit = 0; unit < g_pHardwareConfig->GetSamplerCount(); ++unit )
	{
		SamplerState( unit ).m_BoundTexture = INVALID_SHADERAPI_TEXTURE_HANDLE;
		SetTextureState( (Sampler_t)unit, INVALID_SHADERAPI_TEXTURE_HANDLE );
	}
}

void CShaderAPIDx8::DeleteAllTextures()
{
	ReleaseAllTextures();
	m_Textures.Purge();
}

bool CShaderAPIDx8::IsTexture( ShaderAPITextureHandle_t textureHandle )
{
	LOCK_SHADERAPI();

	if ( !TextureIsAllocated( textureHandle ) )
	{
		return false;
	}

#if !defined( _X360 )
	if ( GetTexture( textureHandle ).m_Flags & Texture_t::IS_DEPTH_STENCIL )
	{
		return GetTexture( textureHandle ).GetDepthStencilSurface() != 0;
	}
	else if ( ( GetTexture( textureHandle ).m_NumCopies == 1 && GetTexture( textureHandle ).GetTexture() != 0 ) ||
				( GetTexture( textureHandle ).m_NumCopies > 1 && GetTexture( textureHandle ).GetTexture( 0 ) != 0 ) )
	{
		return true;
	}
	else
	{
		return false;
	}
#else
	// query is about texture handle validity, not presence
	// texture handle is allocated, texture may or may not be present
	return true;
#endif
}



//-----------------------------------------------------------------------------
// Gets the surface associated with a texture (refcount of surface is increased)
//-----------------------------------------------------------------------------
IDirect3DSurface* CShaderAPIDx8::GetTextureSurface( ShaderAPITextureHandle_t textureHandle )
{
	MEM_ALLOC_D3D_CREDIT();

	IDirect3DSurface* pSurface;

	// We'll be modifying this sucka
	AssertValidTextureHandle( textureHandle );
	Texture_t &tex = GetTexture( textureHandle );
	if ( !( tex.m_Flags & Texture_t::IS_ALLOCATED ) )
	{
		return NULL;
	}

	if ( IsX360() && ( tex.m_Flags & Texture_t::IS_RENDER_TARGET_SURFACE ) )
	{
		pSurface = tex.GetRenderTargetSurface( false );

#if POSIX
		// dxabstract's AddRef/Release have optional args to help track usage
		pSurface->AddRef( 0, "CShaderAPIDx8::GetTextureSurface public addref");
#else
		pSurface->AddRef();
#endif

		return pSurface;
	}

	IDirect3DBaseTexture* pD3DTex = CShaderAPIDx8::GetD3DTexture( textureHandle );
	IDirect3DTexture* pTex = static_cast<IDirect3DTexture*>( pD3DTex );
	Assert( pTex );
	if ( !pTex )
	{
		return NULL;
	}
	
	HRESULT hr = pTex->GetSurfaceLevel( 0, &pSurface );
	Assert( hr == D3D_OK );

	return pSurface;
}

//-----------------------------------------------------------------------------
// Gets the surface associated with a texture (refcount of surface is increased)
//-----------------------------------------------------------------------------
IDirect3DSurface* CShaderAPIDx8::GetDepthTextureSurface( ShaderAPITextureHandle_t textureHandle )
{
	AssertValidTextureHandle( textureHandle );
	if ( !TextureIsAllocated( textureHandle ) )
	{
		return NULL;
	}
	return GetTexture( textureHandle ).GetDepthStencilSurface();
}

//-----------------------------------------------------------------------------
// Changes the render target
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetRenderTargetEx( int nRenderTargetID, ShaderAPITextureHandle_t colorTextureHandle, ShaderAPITextureHandle_t depthTextureHandle )
{
	LOCK_SHADERAPI();
	if ( IsDeactivated( ) )
	{
		return;
	}

	// GR - need to flush batched geometry
	FlushBufferedPrimitives();

#if defined( PIX_INSTRUMENTATION )
	{
		const char *pRT = "Backbuffer";
		const char *pDS = "DefaultDepthStencil";

		if ( colorTextureHandle == SHADER_RENDERTARGET_NONE )
		{
			pRT = "None";
		}
		else if ( colorTextureHandle != SHADER_RENDERTARGET_BACKBUFFER )
		{
			Texture_t &tex = GetTexture( colorTextureHandle );
			pRT = tex.m_DebugName.String();
		}

		if ( depthTextureHandle == SHADER_RENDERTARGET_NONE )
		{
			pDS = "None";
		}
		else if ( depthTextureHandle != SHADER_RENDERTARGET_DEPTHBUFFER )
		{
			Texture_t &tex = GetTexture( depthTextureHandle );
			pDS = tex.m_DebugName.String();
		}

		char buf[256];
		sprintf( buf, "SRT:%s %s", pRT ? pRT : "?", pDS ? pDS : "?" );
		BeginPIXEvent( 0xFFFFFFFF, buf );
		EndPIXEvent();
	}
#endif

#if !defined( _X360 )
	RECORD_COMMAND( DX8_TEST_COOPERATIVE_LEVEL, 0 );
	HRESULT hr = Dx9Device()->TestCooperativeLevel();
	if ( hr != D3D_OK )
	{
		MarkDeviceLost();
		return;
	}
#endif

	IDirect3DSurface* pColorSurface = NULL;
	IDirect3DSurface* pZSurface = NULL;

	RECORD_COMMAND( DX8_SET_RENDER_TARGET, 3 );
	RECORD_INT( nRenderTargetID );
	RECORD_INT( colorTextureHandle );
	RECORD_INT( depthTextureHandle );

	// The 0th render target defines which depth buffer we are using, so 
	// don't bother if we are another render target
	if ( nRenderTargetID > 0 )
	{
		depthTextureHandle = SHADER_RENDERTARGET_NONE;
	}

	// NOTE!!!!  If this code changes, also change Dx8SetRenderTarget in playback.cpp
	bool usingTextureTarget = false;
	if ( colorTextureHandle == SHADER_RENDERTARGET_BACKBUFFER )
	{
		pColorSurface = m_pBackBufferSurface;

#ifdef ENABLE_NULLREF_DEVICE_SUPPORT
		if( pColorSurface )
#endif
		{
			// This is just to make the code a little simpler...
			// (simplifies the release logic)
#if POSIX
			// dxabstract's AddRef/Release have optional args to help track usage
			pColorSurface->AddRef( 0, "+C  CShaderAPIDx8::SetRenderTargetEx public addref 1");
#else
			pColorSurface->AddRef();
#endif
		}
	}
	else
	{
		// get the texture (Refcount increases)
		UnbindTexture( colorTextureHandle );
		pColorSurface = GetTextureSurface( colorTextureHandle );
		if ( !pColorSurface )
		{
			return;
		}

		usingTextureTarget = true;
	}

	if ( depthTextureHandle == SHADER_RENDERTARGET_DEPTHBUFFER )
	{
		// using the default depth buffer
		pZSurface = m_pZBufferSurface;

#ifdef ENABLE_NULLREF_DEVICE_SUPPORT
		if( pZSurface )
#endif
		{
			// simplify the prologue logic
#if POSIX
			// dxabstract's AddRef/Release have optional args to help track usage
			pZSurface->AddRef( 0, "+D  CShaderAPIDx8::SetRenderTargetEx public addref 1");
#else
			pZSurface->AddRef();
#endif
		}
	}
	else if ( depthTextureHandle == SHADER_RENDERTARGET_NONE )
	{
		// GR - disable depth buffer
		pZSurface = NULL;
	}
	else
	{
		UnbindTexture( depthTextureHandle );

		Texture_t &tex = GetTexture( depthTextureHandle );

		//Cannot use a depth/stencil surface derived from a texture. 
		//Asserting helps get the whole call stack instead of letting the 360 report an error with a partial stack
		Assert( !( IsX360() && (tex.m_Flags & Texture_t::IS_DEPTH_STENCIL_TEXTURE) ) );

		if ( tex.m_Flags & Texture_t::IS_DEPTH_STENCIL )
		{
			pZSurface = GetDepthTextureSurface( depthTextureHandle );
			if ( pZSurface )
			{	
#if POSIX
				// dxabstract's AddRef/Release have optional args to help track usage
				pZSurface->AddRef( 0, "+D CShaderAPIDx8::SetRenderTargetEx public addref 2");
#else
				pZSurface->AddRef();
#endif
			}
		}
		else
		{	
			HRESULT hr = ((IDirect3DTexture9*)tex.GetTexture())->GetSurfaceLevel( 0, &pZSurface );
		}

		if ( !pZSurface )
		{
			// Refcount of color surface was increased above
#if POSIX
			// dxabstract's AddRef/Release have optional args to help track usage
			pColorSurface->Release( 0, "-C  CShaderAPIDx8::SetRenderTargetEx public release 1" );
#else
			pColorSurface->Release();
#endif
			return;
		}
		usingTextureTarget = true;
	}

#ifdef _DEBUG
	if ( pZSurface )
	{
		D3DSURFACE_DESC zSurfaceDesc, colorSurfaceDesc;
		pZSurface->GetDesc( &zSurfaceDesc );
		pColorSurface->GetDesc( &colorSurfaceDesc );

		if ( !HushAsserts() )
		{
			Assert( colorSurfaceDesc.Width <= zSurfaceDesc.Width );
			Assert( colorSurfaceDesc.Height <= zSurfaceDesc.Height );
		}
	}
#endif

	// we only set this flag for the 0th render target so that NULL
	// render targets 1-3 don't mess with the viewport on the main RT
	if( nRenderTargetID == 0 )
		m_UsingTextureRenderTarget = usingTextureTarget;

	// NOTE: The documentation says that SetRenderTarget increases the refcount
	// but it doesn't appear to in practice. If this somehow changes (perhaps
	// in a device-specific manner, we're in trouble).
	if ( IsPC() || !IsX360() )
	{
		if ( pColorSurface == m_pBackBufferSurface && nRenderTargetID > 0 )
		{
			// SetRenderTargetEx is overloaded so that if you pass NULL in for anything that
			// isn't the zeroth render target, you effectively disable that MRT index.
			// (Passing in NULL for the zeroth render target means that you want to use the backbuffer
			// as the render target.)
			// hack hack hack!!!!!  If the render target id > 0 and the user passed in NULL, disable the render target
			Dx9Device()->SetRenderTarget( nRenderTargetID, NULL );
		}
		else
		{
			Dx9Device()->SetRenderTarget( nRenderTargetID, pColorSurface );
		}
	}
	else
	{
		Assert( nRenderTargetID == 0 );
		SetRenderTargetInternalXbox( colorTextureHandle );
	}

	// The 0th render target defines which depth buffer we are using, so 
	// don't bother if we are another render target
	if ( nRenderTargetID == 0 )
	{
		Dx9Device()->SetDepthStencilSurface( pZSurface );
	}

	// The 0th render target defines the viewport, therefore it also defines
	// the viewport limits.
	if ( m_UsingTextureRenderTarget && nRenderTargetID == 0 )
	{
		D3DSURFACE_DESC  desc;
		HRESULT hr;
		if ( !pZSurface )
		{
			hr = pColorSurface->GetDesc( &desc );
		}
		else
		{
			hr = pZSurface->GetDesc( &desc );
		}
		Assert( !FAILED(hr) );
		m_ViewportMaxWidth = desc.Width;
		m_ViewportMaxHeight = desc.Height;
	}

	static bool assert_on_refzero = false;
	int ref;
	if ( pZSurface )
	{
#if POSIX
		ref = pZSurface->Release( 0, "-D  CShaderAPIDx8::SetRenderTargetEx public release (z surface)");
#else
		ref = pZSurface->Release();
#endif

		if(assert_on_refzero)
		{
			Assert( ref != 0 );
		}
	}

#ifdef ENABLE_NULLREF_DEVICE_SUPPORT
	if( pColorSurface )
#endif
	{
#if POSIX
		ref = pColorSurface->Release( 0, "-C  CShaderAPIDx8::SetRenderTargetEx public release (color surface)");
#else
		ref = pColorSurface->Release();
#endif
		if(assert_on_refzero)
		{
			Assert( ref != 0 );
		}
	}

	// Changing the render target sets a default viewport.  Force rewrite to preserve the current desired state.
	m_DynamicState.m_Viewport.X = 0;
	m_DynamicState.m_Viewport.Y = 0;
	m_DynamicState.m_Viewport.Width = (DWORD)-1;
	m_DynamicState.m_Viewport.Height = (DWORD)-1;

	ADD_COMMIT_FUNC( COMMIT_PER_DRAW, COMMIT_ALWAYS, CommitSetViewports );
}


//-----------------------------------------------------------------------------
// Changes the render target
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetRenderTarget( ShaderAPITextureHandle_t colorTextureHandle, ShaderAPITextureHandle_t depthTextureHandle )
{
	LOCK_SHADERAPI();
	SetRenderTargetEx( 0, colorTextureHandle, depthTextureHandle );
}

//-----------------------------------------------------------------------------
// Returns the nearest supported format
//-----------------------------------------------------------------------------
ImageFormat CShaderAPIDx8::GetNearestSupportedFormat( ImageFormat fmt, bool bFilteringRequired /* = true */ ) const
{
	return FindNearestSupportedFormat( fmt, false, false, bFilteringRequired );
}


ImageFormat CShaderAPIDx8::GetNearestRenderTargetFormat( ImageFormat fmt ) const
{
	return FindNearestSupportedFormat( fmt, false, true, false );
}


bool CShaderAPIDx8::DoRenderTargetsNeedSeparateDepthBuffer() const
{
	LOCK_SHADERAPI();
	return m_PresentParameters.MultiSampleType != D3DMULTISAMPLE_NONE;
}


//-----------------------------------------------------------------------------
// Indicates we're modifying a texture
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ModifyTexture( ShaderAPITextureHandle_t textureHandle )
{
	tmZone( TELEMETRY_LEVEL2, TMZF_NONE, "%s", __FUNCTION__ );

	LOCK_SHADERAPI();
	// Can't do this if we're locked!
	Assert( m_ModifyTextureLockedLevel < 0 );

	AssertValidTextureHandle( textureHandle );
	m_ModifyTextureHandle = textureHandle;
	
	// If we're got a multi-copy texture, we need to up the current copy count
	Texture_t& tex = GetTexture( textureHandle );
	if (tex.m_NumCopies > 1)
	{
		// Each time we modify a texture, we'll want to switch texture
		// as soon as a TexImage2D call is made...
		tex.m_SwitchNeeded = true;
	}
}


//-----------------------------------------------------------------------------
// Advances the current copy of a texture...
//-----------------------------------------------------------------------------
void CShaderAPIDx8::AdvanceCurrentCopy( ShaderAPITextureHandle_t hTexture )
{
	// May need to switch textures....
	Texture_t& tex = GetTexture( hTexture );
	if (tex.m_NumCopies > 1)
	{
		if (++tex.m_CurrentCopy >= tex.m_NumCopies)
			tex.m_CurrentCopy = 0;

		// When the current copy changes, we need to make sure this texture
		// isn't bound to any stages any more; thereby guaranteeing the new
		// copy will be re-bound.
		UnbindTexture( hTexture );
	}
}


//-----------------------------------------------------------------------------
// Locks, unlocks the current texture
//-----------------------------------------------------------------------------

bool CShaderAPIDx8::TexLock( int level, int cubeFaceID, int xOffset, int yOffset, 
								int width, int height, CPixelWriter& writer )
{
	tmZone( TELEMETRY_LEVEL2, TMZF_NONE, "%s", __FUNCTION__ );

	LOCK_SHADERAPI();

	Assert( m_ModifyTextureLockedLevel < 0 );

	ShaderAPITextureHandle_t hTexture = GetModifyTextureHandle();
	if ( !m_Textures.IsValidIndex( hTexture ) )
		return false;

	// Blow off mip levels if we don't support mipmapping
	if ( !g_pHardwareConfig->SupportsMipmapping() && ( level > 0 ) )
		return false;

	// This test here just makes sure we don't try to download mipmap levels
	// if we weren't able to create them in the first place
	Texture_t& tex = GetTexture( hTexture );
	if ( level >= tex.m_NumLevels  )
	{
		return false;
	}

	// May need to switch textures....
	if ( tex.m_SwitchNeeded )
	{
		AdvanceCurrentCopy( hTexture );
		tex.m_SwitchNeeded = false;
	}

	IDirect3DBaseTexture *pTexture = GetModifyTexture();
#if defined( _X360 )
	// 360 can't lock a bound texture
	if ( pTexture->IsSet( Dx9Device() ) )
	{
		UnbindTexture( hTexture );
	}
#endif

	bool bOK = LockTexture( hTexture, tex.m_CurrentCopy, pTexture,
		level, (D3DCUBEMAP_FACES)cubeFaceID, xOffset, yOffset, width, height, false, writer );
	if ( bOK )
	{
		m_ModifyTextureLockedLevel = level;
		m_ModifyTextureLockedFace = cubeFaceID;
	}
	return bOK;
}

void CShaderAPIDx8::TexUnlock( )
{
	tmZone( TELEMETRY_LEVEL2, TMZF_NONE, "%s", __FUNCTION__ );

	LOCK_SHADERAPI();
	if ( m_ModifyTextureLockedLevel >= 0 )
	{
		Texture_t& tex = GetTexture( GetModifyTextureHandle() );
		UnlockTexture( GetModifyTextureHandle(), tex.m_CurrentCopy, GetModifyTexture(),
			m_ModifyTextureLockedLevel, (D3DCUBEMAP_FACES)m_ModifyTextureLockedFace );

		m_ModifyTextureLockedLevel = -1;
	}
}

//-----------------------------------------------------------------------------
// Texture image upload
//-----------------------------------------------------------------------------
void CShaderAPIDx8::TexImage2D( 
	int			level,
	int			cubeFaceID, 
	ImageFormat	dstFormat, 
	int			z, 
	int			width, 
	int			height, 
	ImageFormat srcFormat, 
	bool		bSrcIsTiled,
	void		*pSrcData )
{
	LOCK_SHADERAPI();

	Assert( pSrcData );
	AssertValidTextureHandle( GetModifyTextureHandle() );

	if ( !m_Textures.IsValidIndex( GetModifyTextureHandle() ) )
	{
		return;
	}

	Assert( (width <= g_pHardwareConfig->Caps().m_MaxTextureWidth) && (height <= g_pHardwareConfig->Caps().m_MaxTextureHeight) );

	// Blow off mip levels if we don't support mipmapping
	if ( !g_pHardwareConfig->SupportsMipmapping() && (level > 0))
	{
		return;
	}

	// This test here just makes sure we don't try to download mipmap levels
	// if we weren't able to create them in the first place
	Texture_t& tex = GetTexture( GetModifyTextureHandle() );
	if ( level >= tex.m_NumLevels )
	{
		return;
	}

	// May need to switch textures....
	if (tex.m_SwitchNeeded)
	{
		AdvanceCurrentCopy( GetModifyTextureHandle() );
		tex.m_SwitchNeeded = false;
	}

	TextureLoadInfo_t info;
	info.m_TextureHandle = GetModifyTextureHandle();
	info.m_pTexture = GetModifyTexture();
	info.m_nLevel = level;
	info.m_nCopy = tex.m_CurrentCopy;
	info.m_CubeFaceID = (D3DCUBEMAP_FACES)cubeFaceID;
	info.m_nWidth = width;
	info.m_nHeight = height;
	info.m_nZOffset = z;
	info.m_SrcFormat = srcFormat;
	info.m_pSrcData = (unsigned char *)pSrcData;
#if defined( _X360 )
	info.m_bSrcIsTiled = bSrcIsTiled;
	info.m_bCanConvertFormat = ( tex.m_Flags & Texture_t::CAN_CONVERT_FORMAT ) != 0;
#else
	info.m_bTextureIsLockable = ( tex.m_Flags & Texture_t::IS_LOCKABLE ) != 0;
#endif
	LoadTexture( info );
	SetModifyTexture( info.m_pTexture );
}

//-----------------------------------------------------------------------------
// Upload to a sub-piece of a texture
//-----------------------------------------------------------------------------
void CShaderAPIDx8::TexSubImage2D( 
	int			level, 
	int			cubeFaceID,
	int			xOffset,
	int			yOffset, 
	int			zOffset,
	int			width,
	int			height,
	ImageFormat srcFormat,
	int			srcStride,
	bool		bSrcIsTiled,
	void		*pSrcData )
{
	LOCK_SHADERAPI();

	Assert( pSrcData );
	AssertValidTextureHandle( GetModifyTextureHandle() );

	if ( !m_Textures.IsValidIndex( GetModifyTextureHandle() ) )
	{
		return;
	}

	// Blow off mip levels if we don't support mipmapping
	if ( !g_pHardwareConfig->SupportsMipmapping() && ( level > 0 ) )
	{
		return;
	}

	// NOTE: This can only be done with procedural textures if this method is
	// being used to download the entire texture, cause last frame's partial update
	// may be in a completely different texture! Sadly, I don't have all of the
	// information I need, but I can at least check a couple things....
#ifdef _DEBUG
	if ( GetTexture( GetModifyTextureHandle() ).m_NumCopies > 1 )
	{
		Assert( (xOffset == 0) && (yOffset == 0) );
	}
#endif

	// This test here just makes sure we don't try to download mipmap levels
	// if we weren't able to create them in the first place
	Texture_t& tex = GetTexture( GetModifyTextureHandle() );
	if ( level >= tex.m_NumLevels )
	{
		return;
	}

	// May need to switch textures....
	if ( tex.m_SwitchNeeded )
	{
		AdvanceCurrentCopy( GetModifyTextureHandle() );
		tex.m_SwitchNeeded = false;
	}

	TextureLoadInfo_t info;
	info.m_TextureHandle = GetModifyTextureHandle();
	info.m_pTexture = GetModifyTexture();
	info.m_nLevel = level;
	info.m_nCopy = tex.m_CurrentCopy;
	info.m_CubeFaceID = (D3DCUBEMAP_FACES)cubeFaceID;
	info.m_nWidth = width;
	info.m_nHeight = height;
	info.m_nZOffset = zOffset;
	info.m_SrcFormat = srcFormat;
	info.m_pSrcData = (unsigned char *)pSrcData;
#if defined( _X360 )
	info.m_bSrcIsTiled = bSrcIsTiled;
	info.m_bCanConvertFormat = ( tex.m_Flags & Texture_t::CAN_CONVERT_FORMAT ) != 0;
#else
	info.m_bTextureIsLockable = ( tex.m_Flags & Texture_t::IS_LOCKABLE ) != 0;
#endif
	LoadSubTexture( info, xOffset, yOffset, srcStride );
}

//-----------------------------------------------------------------------------
// Volume texture upload
//-----------------------------------------------------------------------------
void CShaderAPIDx8::TexImageFromVTF( IVTFTexture *pVTF, int iVTFFrame )
{
	LOCK_SHADERAPI();
	Assert( pVTF );
	AssertValidTextureHandle( GetModifyTextureHandle() );
	if ( !m_Textures.IsValidIndex( GetModifyTextureHandle() ) )
	{
		return;
	}
	Texture_t& tex = GetTexture( GetModifyTextureHandle() );

	// May need to switch textures....
	if (tex.m_SwitchNeeded)
	{
		AdvanceCurrentCopy( GetModifyTextureHandle() );
		tex.m_SwitchNeeded = false;
	}

	TextureLoadInfo_t info;
	info.m_TextureHandle = GetModifyTextureHandle();
	info.m_pTexture = GetModifyTexture();
	info.m_nLevel = 0;
	info.m_nCopy = tex.m_CurrentCopy;
	info.m_CubeFaceID = (D3DCUBEMAP_FACES)0;
	info.m_nWidth = 0;
	info.m_nHeight = 0;
	info.m_nZOffset = 0;
	info.m_SrcFormat = pVTF->Format();
	info.m_pSrcData = NULL;
#if defined( _X360 )
	info.m_bSrcIsTiled = pVTF->IsPreTiled();
	info.m_bCanConvertFormat = ( tex.m_Flags & Texture_t::CAN_CONVERT_FORMAT ) != 0;
#else
	info.m_bTextureIsLockable = ( tex.m_Flags & Texture_t::IS_LOCKABLE ) != 0;
#endif
	if ( pVTF->Depth() > 1 )
	{
		LoadVolumeTextureFromVTF( info, pVTF, iVTFFrame );
	}
	else if ( pVTF->IsCubeMap() )
	{
		if ( HardwareConfig()->SupportsCubeMaps() )
		{
			LoadCubeTextureFromVTF( info, pVTF, iVTFFrame );
		}
		else
		{
			info.m_CubeFaceID = (D3DCUBEMAP_FACES)6;
			LoadTextureFromVTF( info, pVTF, iVTFFrame );
		}
	}
	else
	{
		LoadTextureFromVTF( info, pVTF, iVTFFrame );
	}
	SetModifyTexture( info.m_pTexture );
}


//-----------------------------------------------------------------------------
// Is the texture resident?
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::IsTextureResident( ShaderAPITextureHandle_t textureHandle )
{
	return true;
}


//-----------------------------------------------------------------------------
// Level of anisotropic filtering
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetAnisotropicLevel( int nAnisotropyLevel )
{
	LOCK_SHADERAPI();

	// NOTE: This must be called before the rest of the code in this function so
	//       anisotropic can be set per-texture to force it on! This will also avoid
	//       a possible infinite loop that existed before.
	g_pShaderUtil->NoteAnisotropicLevel( nAnisotropyLevel );

	// Never set this to 1. In the case we want it set to 1, we will use this to override
	//   aniso per-texture, so set it to something reasonable
	if ( nAnisotropyLevel > g_pHardwareConfig->Caps().m_nMaxAnisotropy || nAnisotropyLevel <= 1 )
	{
		// Set it to 1/4 the max but between 2-8
		nAnisotropyLevel = max( 2, min( 8, ( g_pHardwareConfig->Caps().m_nMaxAnisotropy / 4 ) ) );
	}

	// Set the D3D max insotropy state for all samplers
	for ( int i = 0; i < g_pHardwareConfig->Caps().m_NumSamplers; ++i)
	{
		SamplerState(i).m_nAnisotropicLevel = nAnisotropyLevel;
		SetSamplerState( i, D3DSAMP_MAXANISOTROPY, SamplerState(i).m_nAnisotropicLevel );
	}
}


//-----------------------------------------------------------------------------
// Sets the priority
//-----------------------------------------------------------------------------
void CShaderAPIDx8::TexSetPriority( int priority )
{
#if !defined( _X360 )
	LOCK_SHADERAPI();

	// A hint to the cacher...
	ShaderAPITextureHandle_t hModifyTexture = GetModifyTextureHandle();
	if ( hModifyTexture == INVALID_SHADERAPI_TEXTURE_HANDLE )
		return;

	Texture_t& tex = GetTexture( hModifyTexture );
	if ( tex.m_NumCopies > 1 )
	{
		for (int i = 0; i < tex.m_NumCopies; ++i)
			tex.GetTexture( i )->SetPriority( priority );
	}
	else
	{
		tex.GetTexture()->SetPriority( priority );
	}
#endif
}

void CShaderAPIDx8::TexLodClamp( int finest )
{
	LOCK_SHADERAPI();

	ShaderAPITextureHandle_t hModifyTexture = GetModifyTextureHandle();
	if ( hModifyTexture == INVALID_SHADERAPI_TEXTURE_HANDLE )
		return;

	Texture_t& tex = GetTexture( hModifyTexture );
	tex.m_FinestMipmapLevel = finest;
}

void CShaderAPIDx8::TexLodBias( float bias )
{
	LOCK_SHADERAPI();

	ShaderAPITextureHandle_t hModifyTexture = GetModifyTextureHandle();
	if ( hModifyTexture == INVALID_SHADERAPI_TEXTURE_HANDLE )
		return;

	Texture_t& tex = GetTexture( hModifyTexture );
	tex.m_LodBias = bias;
}



//-----------------------------------------------------------------------------
// Texturemapping state
//-----------------------------------------------------------------------------
void CShaderAPIDx8::TexWrap( ShaderTexCoordComponent_t coord, ShaderTexWrapMode_t wrapMode )
{
	LOCK_SHADERAPI();

	ShaderAPITextureHandle_t hModifyTexture = GetModifyTextureHandle();
	if ( hModifyTexture == INVALID_SHADERAPI_TEXTURE_HANDLE )
		return;

	D3DTEXTUREADDRESS address;
	switch( wrapMode )
	{
	case SHADER_TEXWRAPMODE_CLAMP:
		address = D3DTADDRESS_CLAMP;
		break;
	case SHADER_TEXWRAPMODE_REPEAT:
		address = D3DTADDRESS_WRAP;
		break; 
	case SHADER_TEXWRAPMODE_BORDER:
		address = D3DTADDRESS_BORDER;
		break; 
	default:
		address = D3DTADDRESS_CLAMP;
		Warning( "CShaderAPIDx8::TexWrap: unknown wrapMode\n" );
		break;
	}

	switch( coord )
	{
	case SHADER_TEXCOORD_S:
		GetTexture( hModifyTexture ).m_UTexWrap = address;
		break;
	case SHADER_TEXCOORD_T:
		GetTexture( hModifyTexture ).m_VTexWrap = address;
		break;
	case SHADER_TEXCOORD_U:
		GetTexture( hModifyTexture ).m_WTexWrap = address;
		break;
	default:
		Warning( "CShaderAPIDx8::TexWrap: unknown coord\n" );
		break;
	}
}

void CShaderAPIDx8::TexMinFilter( ShaderTexFilterMode_t texFilterMode )
{
	LOCK_SHADERAPI();

	ShaderAPITextureHandle_t hModifyTexture = GetModifyTextureHandle();
	if ( hModifyTexture == INVALID_SHADERAPI_TEXTURE_HANDLE )
		return;

	switch( texFilterMode )
	{
	case SHADER_TEXFILTERMODE_NEAREST:
		GetTexture( hModifyTexture ).m_MinFilter = D3DTEXF_POINT;
		GetTexture( hModifyTexture ).m_MipFilter = D3DTEXF_NONE;
		break;
	case SHADER_TEXFILTERMODE_LINEAR:
		GetTexture( hModifyTexture ).m_MinFilter = D3DTEXF_LINEAR;
		GetTexture( hModifyTexture ).m_MipFilter = D3DTEXF_NONE;
		break;
	case SHADER_TEXFILTERMODE_NEAREST_MIPMAP_NEAREST:
		GetTexture( hModifyTexture ).m_MinFilter = D3DTEXF_POINT;
		GetTexture( hModifyTexture ).m_MipFilter = 
			GetTexture( hModifyTexture ).m_NumLevels != 1 ? D3DTEXF_POINT : D3DTEXF_NONE;
		break;
	case SHADER_TEXFILTERMODE_LINEAR_MIPMAP_NEAREST:
		GetTexture( hModifyTexture ).m_MinFilter = D3DTEXF_LINEAR;
		GetTexture( hModifyTexture ).m_MipFilter = 
			GetTexture( hModifyTexture ).m_NumLevels != 1 ? D3DTEXF_POINT : D3DTEXF_NONE;
		break;
	case SHADER_TEXFILTERMODE_NEAREST_MIPMAP_LINEAR:
		GetTexture( hModifyTexture ).m_MinFilter = D3DTEXF_POINT;
		GetTexture( hModifyTexture ).m_MipFilter = 
			GetTexture( hModifyTexture ).m_NumLevels != 1 ? D3DTEXF_LINEAR : D3DTEXF_NONE;
		break;
	case SHADER_TEXFILTERMODE_LINEAR_MIPMAP_LINEAR:
		GetTexture( hModifyTexture ).m_MinFilter = D3DTEXF_LINEAR;
		GetTexture( hModifyTexture ).m_MipFilter = 
			GetTexture( hModifyTexture ).m_NumLevels != 1 ? D3DTEXF_LINEAR : D3DTEXF_NONE;
		break;
	case SHADER_TEXFILTERMODE_ANISOTROPIC:
		GetTexture( hModifyTexture ).m_MinFilter = D3DTEXF_ANISOTROPIC;
		GetTexture( hModifyTexture ).m_MipFilter = 
			GetTexture( hModifyTexture ).m_NumLevels != 1 ? D3DTEXF_LINEAR : D3DTEXF_NONE;
		break;
	default:
		Warning( "CShaderAPIDx8::TexMinFilter: Unknown texFilterMode\n" );
		break;
	}
}

void CShaderAPIDx8::TexMagFilter( ShaderTexFilterMode_t texFilterMode )
{
	LOCK_SHADERAPI();

	ShaderAPITextureHandle_t hModifyTexture = GetModifyTextureHandle();
	if ( hModifyTexture == INVALID_SHADERAPI_TEXTURE_HANDLE )
		return;

	switch( texFilterMode )
	{
	case SHADER_TEXFILTERMODE_NEAREST:
		GetTexture( hModifyTexture ).m_MagFilter = D3DTEXF_POINT;
		break;
	case SHADER_TEXFILTERMODE_LINEAR:
		GetTexture( hModifyTexture ).m_MagFilter = D3DTEXF_LINEAR;
		break;
	case SHADER_TEXFILTERMODE_NEAREST_MIPMAP_NEAREST:
		Warning( "CShaderAPIDx8::TexMagFilter: SHADER_TEXFILTERMODE_NEAREST_MIPMAP_NEAREST is invalid\n" );
		break;
	case SHADER_TEXFILTERMODE_LINEAR_MIPMAP_NEAREST:
		Warning( "CShaderAPIDx8::TexMagFilter: SHADER_TEXFILTERMODE_LINEAR_MIPMAP_NEAREST is invalid\n" );
		break;
	case SHADER_TEXFILTERMODE_NEAREST_MIPMAP_LINEAR:
		Warning( "CShaderAPIDx8::TexMagFilter: SHADER_TEXFILTERMODE_NEAREST_MIPMAP_LINEAR is invalid\n" );
		break;
	case SHADER_TEXFILTERMODE_LINEAR_MIPMAP_LINEAR:
		Warning( "CShaderAPIDx8::TexMagFilter: SHADER_TEXFILTERMODE_LINEAR_MIPMAP_LINEAR is invalid\n" );
		break;
	case SHADER_TEXFILTERMODE_ANISOTROPIC:
		GetTexture( hModifyTexture ).m_MagFilter = g_pHardwareConfig->Caps().m_bSupportsMagAnisotropicFiltering ? D3DTEXF_ANISOTROPIC : D3DTEXF_LINEAR;
		break;
	default:
		Warning( "CShaderAPIDx8::TexMAGFilter: Unknown texFilterMode\n" );
		break;
	}
}


//-----------------------------------------------------------------------------
// Gets the matrix stack from the matrix mode
//-----------------------------------------------------------------------------

int CShaderAPIDx8::GetMatrixStack( MaterialMatrixMode_t mode ) const
{
	Assert( mode >= 0 && mode < NUM_MATRIX_MODES );
	return mode;
}

//-----------------------------------------------------------------------------
// Returns true if we're modulating constant color into the vertex color
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::IsModulatingVertexColor() const
{
	return m_TransitionTable.CurrentShadowShaderState()->m_ModulateConstantColor;
}


//-----------------------------------------------------------------------------
// Material property (used to deal with overbright for lights)
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetDefaultMaterial()
{
#if !defined( _X360 )
	D3DMATERIAL mat;
	mat.Diffuse.r = mat.Diffuse.g = mat.Diffuse.b = mat.Diffuse.a = 1.0f;
	mat.Ambient.r = mat.Ambient.g = mat.Ambient.b = mat.Ambient.a = 0.0f;
	mat.Specular.r = mat.Specular.g = mat.Specular.b = mat.Specular.a = 0.0f;
	mat.Emissive.r = mat.Emissive.g = mat.Emissive.b = mat.Emissive.a = 0.0f;
	mat.Power = 1.0f;
	Dx9Device()->SetMaterial( &mat );
#endif
}

//-----------------------------------------------------------------------------
// lighting related methods
//-----------------------------------------------------------------------------

void CShaderAPIDx8::SetAmbientLight( float r, float g, float b )
{
	LOCK_SHADERAPI();
	unsigned int ambient = D3DCOLOR_ARGB( 255, (int)(r * 255), 
		(int)(g * 255), (int)(b * 255) );
	if (ambient != m_DynamicState.m_Ambient)
	{
		m_DynamicState.m_Ambient = ambient;
		SetSupportedRenderState( D3DRS_AMBIENT, ambient );
	}
}

void CShaderAPIDx8::SetLightingOrigin( Vector vLightingOrigin )
{
	if ( vLightingOrigin != m_DynamicState.m_vLightingOrigin )
	{
		FlushBufferedPrimitives();
		m_DynamicState.m_vLightingOrigin = vLightingOrigin;
	}
}

//#define NO_LOCAL_LIGHTS
void CShaderAPIDx8::SetLight( int lightNum, const LightDesc_t& desc_ )
{
	LOCK_SHADERAPI();
#ifdef NO_LOCAL_LIGHTS
	LightDesc_t desc = desc_;
	desc.m_Type = MATERIAL_LIGHT_DISABLE;
#else
	LightDesc_t &desc = const_cast<LightDesc_t &>(desc_); // to permit '&'
#endif
	Assert( lightNum < g_pHardwareConfig->Caps().m_MaxNumLights && lightNum >= 0 );

	if( lightNum >= g_pHardwareConfig->Caps().m_MaxNumLights || lightNum < 0 )
		return;
	
	m_DynamicState.m_LightDescs[lightNum] = desc;
	
	FlushBufferedPrimitives();

	if (desc.m_Type == MATERIAL_LIGHT_DISABLE)
	{
		if (m_DynamicState.m_LightEnable[lightNum])
		{
			m_DynamicState.m_LightEnableChanged[lightNum] = STATE_CHANGED;
			m_DynamicState.m_LightEnable[lightNum] = false;
		}
		return;
	}

	if (!m_DynamicState.m_LightEnable[lightNum])
	{
		m_DynamicState.m_LightEnableChanged[lightNum] = STATE_CHANGED;
		m_DynamicState.m_LightEnable[lightNum] = true;
	}

	D3DLIGHT light;
	switch( desc.m_Type )
	{
	case MATERIAL_LIGHT_POINT:
		light.Type = D3DLIGHT_POINT;
		light.Range = desc.m_Range;
		break;

	case MATERIAL_LIGHT_DIRECTIONAL:
		light.Type = D3DLIGHT_DIRECTIONAL;
		light.Range = 1e12;	// This is supposed to be ignored
		break;

	case MATERIAL_LIGHT_SPOT:
		light.Type = D3DLIGHT_SPOT;
		light.Range = desc.m_Range;
		break;

	default:
		m_DynamicState.m_LightEnable[lightNum] = false;
		return;
	}

	// This is a D3D limitation
	Assert( (light.Range >= 0) && (light.Range <= sqrt(FLT_MAX)) );

	memcpy( &light.Diffuse, &desc.m_Color[0], 3*sizeof(float) );
	memcpy( &light.Specular, &desc.m_Color[0], 3*sizeof(float) );
	light.Diffuse.a = 1.0f;
	light.Specular.a = 1.0f;
	light.Ambient.a = light.Ambient.b = light.Ambient.g = light.Ambient.r = 0;
	memcpy( &light.Position, &desc.m_Position[0], 3 * sizeof(float) );
	memcpy( &light.Direction, &desc.m_Direction[0], 3 * sizeof(float) );
	light.Falloff = desc.m_Falloff;
	light.Attenuation0 = desc.m_Attenuation0;
	light.Attenuation1 = desc.m_Attenuation1;
	light.Attenuation2 = desc.m_Attenuation2;

	// normalize light color...
	light.Theta = desc.m_Theta;
	light.Phi = desc.m_Phi;
	if (light.Phi > M_PI)
		light.Phi = M_PI;

	// This piece of crap line of code is because if theta gets too close to phi,
	// we get no light at all.
	if (light.Theta - light.Phi > -1e-3)
		light.Theta = light.Phi - 1e-3;

	m_DynamicState.m_LightChanged[lightNum] = STATE_CHANGED;
	memcpy( &m_DynamicState.m_Lights[lightNum], &light, sizeof(light) );
}

void CShaderAPIDx8::DisableAllLocalLights()
{
	LOCK_SHADERAPI();
	bool bFlushed = false;
	for ( int lightNum = 0; lightNum < MAX_NUM_LIGHTS; lightNum++ )
	{
		if (m_DynamicState.m_LightEnable[lightNum])
		{
			if ( !bFlushed )
			{
				FlushBufferedPrimitives();
				bFlushed = true;
			}
			m_DynamicState.m_LightDescs[lightNum].m_Type = MATERIAL_LIGHT_DISABLE;
			m_DynamicState.m_LightEnableChanged[lightNum] = STATE_CHANGED;
			m_DynamicState.m_LightEnable[lightNum] = false;
		}
	}
}

int CShaderAPIDx8::GetMaxLights( void ) const
{
	return g_pHardwareConfig->Caps().m_MaxNumLights;
}

const LightDesc_t& CShaderAPIDx8::GetLight( int lightNum ) const
{
	Assert( lightNum < g_pHardwareConfig->Caps().m_MaxNumLights && lightNum >= 0 );
	return m_DynamicState.m_LightDescs[lightNum];
}

//-----------------------------------------------------------------------------
// Ambient cube 
//-----------------------------------------------------------------------------

//#define NO_AMBIENT_CUBE 1
void CShaderAPIDx8::SetAmbientLightCube( Vector4D cube[6] )
{
	LOCK_SHADERAPI();
/*
	int i;
	for( i = 0; i < 6; i++ )
	{
		ColorClamp( cube[i].AsVector3D() );
//		if( i == 0 )
//		{
//			Warning( "%d: %f %f %f\n", i, cube[i][0], cube[i][1], cube[i][2] );
//		}
	}
*/
	if (memcmp(&m_DynamicState.m_AmbientLightCube[0][0], cube, 6 * sizeof(Vector4D)))
	{
		memcpy( &m_DynamicState.m_AmbientLightCube[0][0], cube, 6 * sizeof(Vector4D) );

#ifdef NO_AMBIENT_CUBE
		memset( &m_DynamicState.m_AmbientLightCube[0][0], 0, 6 * sizeof(Vector4D) );
#endif

//#define DEBUG_AMBIENT_CUBE

#ifdef DEBUG_AMBIENT_CUBE
		m_DynamicState.m_AmbientLightCube[0][0] = 1.0f;
		m_DynamicState.m_AmbientLightCube[0][1] = 0.0f;
		m_DynamicState.m_AmbientLightCube[0][2] = 0.0f;

		m_DynamicState.m_AmbientLightCube[1][0] = 0.0f;
		m_DynamicState.m_AmbientLightCube[1][1] = 1.0f;
		m_DynamicState.m_AmbientLightCube[1][2] = 0.0f;

		m_DynamicState.m_AmbientLightCube[2][0] = 0.0f;
		m_DynamicState.m_AmbientLightCube[2][1] = 0.0f;
		m_DynamicState.m_AmbientLightCube[2][2] = 1.0f;

		m_DynamicState.m_AmbientLightCube[3][0] = 1.0f;
		m_DynamicState.m_AmbientLightCube[3][1] = 0.0f;
		m_DynamicState.m_AmbientLightCube[3][2] = 1.0f;

		m_DynamicState.m_AmbientLightCube[4][0] = 1.0f;
		m_DynamicState.m_AmbientLightCube[4][1] = 1.0f;
		m_DynamicState.m_AmbientLightCube[4][2] = 0.0f;

		m_DynamicState.m_AmbientLightCube[5][0] = 0.0f;
		m_DynamicState.m_AmbientLightCube[5][1] = 1.0f;
		m_DynamicState.m_AmbientLightCube[5][2] = 1.0f;
#endif

		m_CachedAmbientLightCube = STATE_CHANGED;
	}
}

void CShaderAPIDx8::SetVertexShaderStateAmbientLightCube()
{
	if (m_CachedAmbientLightCube & STATE_CHANGED_VERTEX_SHADER)
	{
		SetVertexShaderConstant( VERTEX_SHADER_AMBIENT_LIGHT, m_DynamicState.m_AmbientLightCube[0].Base(), 6 );
		m_CachedAmbientLightCube &= ~STATE_CHANGED_VERTEX_SHADER;
	}
}


void CShaderAPIDx8::SetPixelShaderStateAmbientLightCube( int pshReg, bool bForceToBlack )
{
	float *pCubeBase;
	Vector4D tempCube[6];

	if( bForceToBlack )
	{
		for ( int i=0; i<6 ; i++ )
			tempCube[i].Init();
		
		pCubeBase = tempCube[0].Base();
	}
	else
	{
		pCubeBase = m_DynamicState.m_AmbientLightCube[0].Base();
	}

	SetPixelShaderConstant( pshReg, pCubeBase, 6 );
}

float CShaderAPIDx8::GetAmbientLightCubeLuminance( void )
{
	Vector4D vLuminance( 0.3f, 0.59f, 0.11f, 0.0f );
	float fLuminance = 0.0f;

	for (int i=0; i<6; i++)
	{
		fLuminance += vLuminance.Dot( m_DynamicState.m_AmbientLightCube[i] );
	}

	return fLuminance / 6.0f;
}

static inline RECT* RectToRECT( Rect_t *pSrcRect, RECT &dstRect )
{
	if ( !pSrcRect )
		return NULL;

	dstRect.left = pSrcRect->x;
	dstRect.top = pSrcRect->y;
	dstRect.right = pSrcRect->x + pSrcRect->width;
	dstRect.bottom = pSrcRect->y + pSrcRect->height;
	return &dstRect;
}

void CShaderAPIDx8::CopyRenderTargetToTextureEx( ShaderAPITextureHandle_t textureHandle, int nRenderTargetID, Rect_t *pSrcRect, Rect_t *pDstRect )
{
	LOCK_SHADERAPI();
	VPROF_BUDGET( "CShaderAPIDx8::CopyRenderTargetToTexture", "Refraction overhead" );

	if ( !TextureIsAllocated( textureHandle ) )
		return;

#if defined( PIX_INSTRUMENTATION )
	{
		const char *pRT = ( nRenderTargetID < 0 ) ? "DS" : "RT";

		if ( textureHandle == SHADER_RENDERTARGET_NONE )
		{
			pRT = "None";
		}
		else if ( textureHandle != SHADER_RENDERTARGET_BACKBUFFER )
		{
			Texture_t &tex = GetTexture( textureHandle );
			pRT = tex.m_DebugName.String();
		}

		char buf[256];
		sprintf( buf, "CopyRTToTexture:%s", pRT ? pRT : "?" );
		BeginPIXEvent( 0xFFFFFFFF, buf );
		EndPIXEvent();
	}
#endif

	// Don't flush here!!  If you have to flush here, then there is a driver bug.
	// FlushHardware( );

	AssertValidTextureHandle( textureHandle );
	Texture_t *pTexture = &GetTexture( textureHandle );
	Assert( pTexture );
	IDirect3DTexture *pD3DTexture = (IDirect3DTexture *)pTexture->GetTexture();
	Assert( pD3DTexture );

#if !defined( _X360 )
	IDirect3DSurface* pRenderTargetSurface;
	HRESULT hr = Dx9Device()->GetRenderTarget( nRenderTargetID, &pRenderTargetSurface );
	if ( FAILED( hr ) )
	{
		Assert( 0 );
		return;
	}

	IDirect3DSurface *pDstSurf;
	hr = pD3DTexture->GetSurfaceLevel( 0, &pDstSurf );
	Assert( !FAILED( hr ) );
	if ( FAILED( hr ) )
	{
		pRenderTargetSurface->Release();
		return;
	}

	bool tryblit = true;
	if ( tryblit )
	{
		RECORD_COMMAND( DX8_COPY_FRAMEBUFFER_TO_TEXTURE, 1 );
		RECORD_INT( textureHandle );
		
		RECT srcRect, dstRect;
		hr = Dx9Device()->StretchRect( pRenderTargetSurface, RectToRECT( pSrcRect, srcRect ),
			pDstSurf, RectToRECT( pDstRect, dstRect ), D3DTEXF_LINEAR );
		Assert( !FAILED( hr ) );
	}

	pDstSurf->Release();
	pRenderTargetSurface->Release();
#else
	DWORD flags = 0;
	switch( nRenderTargetID )
	{
	case -1:
		flags = D3DRESOLVE_DEPTHSTENCIL | D3DRESOLVE_FRAGMENT0;
		break;
	case 0:
		flags = D3DRESOLVE_RENDERTARGET0;
		break;
	case 1:
	case 2:
	case 3:
		// not supporting MRT
		Assert( 0 );
		return;
	NO_DEFAULT
	};

	// not prepared to handle mip mapping yet
	Assert( pD3DTexture->GetLevelCount() == 1 ); 

	D3DPOINT dstPoint = { 0 };
	if ( pDstRect )
	{
		dstPoint.x = pDstRect->x;
		dstPoint.y = pDstRect->y;
	}

	int destWidth, destHeight;
	if( pDstRect )
	{
		destWidth = pDstRect->width;
		destHeight = pDstRect->height;

		Assert( (destWidth <= pTexture->GetWidth()) && (destHeight <= pTexture->GetHeight()) );
	}
	else
	{
		destWidth = pTexture->GetWidth();
		destHeight = pTexture->GetHeight();
	}	

	RECT srcRect;
	RECT *pResolveRect = NULL;
	int srcWidth, srcHeight;
	if ( pSrcRect )
	{
		RectToRECT( pSrcRect, srcRect );
		pResolveRect = &srcRect;

		// resolve has no stretching ability, and we can only compensate when doing a resolve to a whole texture larger than the source
		Assert( !pDstRect || ( pSrcRect->width <= pDstRect->width && pSrcRect->height <= pDstRect->height ) );

		srcWidth = pSrcRect->width;
		srcHeight = pSrcRect->height;
	}
	else
	{
		srcRect.left = srcRect.top = 0;
		srcRect.right = m_DynamicState.m_Viewport.Width;
		srcRect.bottom = m_DynamicState.m_Viewport.Height;
		if( (srcRect.right < 0) || (srcRect.bottom < 0) )
		{
			if( m_UsingTextureRenderTarget )
			{
				srcRect.right = m_ViewportMaxWidth;
				srcRect.bottom = m_ViewportMaxHeight;
			}
			else
			{
				int w,h;
				GetBackBufferDimensions( w, h );
				srcRect.right = w;
				srcRect.bottom = h;
			}
		}
		srcWidth = srcRect.right;
		srcHeight = srcRect.bottom;
	}

	if( (srcWidth != destWidth) || (srcHeight != destHeight) )
	{
		//Not a 1:1 resolve, we should only have gotten this far if we can downsize the target texture to compensate
		Assert( (destWidth > srcWidth) && (destHeight > srcHeight) && (dstPoint.x == 0) && (dstPoint.y == 0) );

		//What we're doing is telling D3D that this texture is smaller than it is so the resolve is 1:1.
		//We leave the texture in this state until it resolves from something bigger.
		//All outside code still thinks this texture is it's original size. And it still owns enough memory to go back to it's original size.
		pD3DTexture->Format.Size.TwoD.Width = srcWidth - 1;
		pD3DTexture->Format.Size.TwoD.Height = srcHeight - 1; //no idea why they store it as size-1, but they do
		pResolveRect = NULL;
	}
	else
	{
		//restore D3D texture to full size in case it was previously downsized
		pD3DTexture->Format.Size.TwoD.Width = pTexture->GetWidth() - 1;
		pD3DTexture->Format.Size.TwoD.Height = pTexture->GetHeight() - 1; //no idea why they store it as size-1, but they do
	}

	// if we convert to srgb format, we need the original format for reverting. We only need the first DWORD of GPUTEXTURE_FETCH_CONSTANT.
	DWORD linearFormatBackup = pD3DTexture->Format.dword[0]; 
	if ( !( flags & D3DRESOLVE_DEPTHSTENCIL ) && ( m_DynamicState.m_bSRGBWritesEnabled ) )
	{
		// we need a matched resolve regarding sRGB to get values transfered as-is
		// when the surface is sRGB, use the corresponding sRGB texture
		pD3DTexture->Format.SignX = pD3DTexture->Format.SignY = pD3DTexture->Format.SignZ = 3;
	}

	HRESULT hr = Dx9Device()->Resolve( flags, (D3DRECT*)pResolveRect, pD3DTexture, &dstPoint, 0, 0, NULL, 0, 0,	NULL );
	Assert( !FAILED( hr ) );

	pD3DTexture->Format.dword[0] = linearFormatBackup;
#endif
}

void CShaderAPIDx8::CopyRenderTargetToScratchTexture( ShaderAPITextureHandle_t srcRt, ShaderAPITextureHandle_t dstTex, Rect_t *pSrcRect, Rect_t *pDstRect )
{
	LOCK_SHADERAPI();

	if  ( !TextureIsAllocated( srcRt ) || !TextureIsAllocated( dstTex ) )
	{
		Assert( !"Fix that render target or dest texture aren't allocated." );
		return;
	}

	HRESULT hr = D3D_OK;

	IDirect3DSurface9* srcSurf = NULL;
	AssertValidTextureHandle( srcRt );
	Texture_t *pSrcRt = &GetTexture( srcRt );
	Assert( pSrcRt );
	IDirect3DTexture *pD3DSrcRt = ( IDirect3DTexture * ) pSrcRt->GetTexture();
	Assert( pD3DSrcRt );
	hr = pD3DSrcRt->GetSurfaceLevel( 0, &srcSurf );
	Assert( SUCCEEDED( hr ) && srcSurf );

	IDirect3DSurface9* dstSurf = NULL;
	AssertValidTextureHandle( dstTex );
	Texture_t *pDstTex = &GetTexture( dstTex );
	Assert( pDstTex );
	IDirect3DTexture *pD3DDstTex = ( IDirect3DTexture * ) pDstTex->GetTexture();
	Assert( pD3DDstTex );
	hr = pD3DDstTex->GetSurfaceLevel( 0, &dstSurf );
	Assert( SUCCEEDED( hr ) && dstSurf );

	// This does it.
	hr = Dx9Device()->GetRenderTargetData( srcSurf, dstSurf );
	Assert( SUCCEEDED( hr ) );

	srcSurf->Release();
	dstSurf->Release();
}

//-------------------------------------------------------------------------
// Allows locking and unlocking of very specific surface types. pOutBits and pOutPitch will not be touched if 
// the lock fails.
//-------------------------------------------------------------------------
void CShaderAPIDx8::LockRect( void** pOutBits, int* pOutPitch, ShaderAPITextureHandle_t texHandle, int mipmap, int x, int y, int w, int h, bool bWrite, bool bRead )
{
	LOCK_SHADERAPI();

	Assert( pOutBits );
	Assert( pOutPitch );

	if ( !TextureIsAllocated( texHandle ) )
	{
		Assert( !"Fix that texture isn't allocated." );
		return;
	}

	HRESULT hr = D3D_OK;
	IDirect3DSurface9* surf = NULL;
	AssertValidTextureHandle( texHandle );
	Texture_t *pTex = &GetTexture( texHandle );
	Assert( pTex );
	IDirect3DTexture *pD3DTex = ( IDirect3DTexture * ) pTex->GetTexture();
	Assert( pD3DTex );

	hr = pD3DTex->GetSurfaceLevel( mipmap, &surf );
	Assert( SUCCEEDED( hr ) && surf );

	D3DLOCKED_RECT lockRect = { 0 };
	RECT srcRect = { x, y, w, h };
	DWORD flags = 0;

	if ( !bRead && !bWrite )
	{
		Assert( !"Asking to neither read nor write? Probably a caller bug." );
		goto cleanup;
	}

	if (  bRead && !bWrite )
		flags = D3DLOCK_READONLY;

	hr = surf->LockRect( &lockRect, &srcRect, flags );
	if ( FAILED( hr ) )
	{
		Assert( !"Lock failed, look into why." );
		goto cleanup;
	}

	(*pOutBits)  = lockRect.pBits;
	(*pOutPitch) = lockRect.Pitch;

cleanup:
	surf->Release();
}

void CShaderAPIDx8::UnlockRect( ShaderAPITextureHandle_t texHandle, int mipmap )
{
	LOCK_SHADERAPI();

	if ( !TextureIsAllocated( texHandle ) )
	{
		Assert( !"Fix that texture isn't allocated." );
		return;
	}

	HRESULT hr = D3D_OK;
	IDirect3DSurface9* surf = NULL;
	AssertValidTextureHandle( texHandle );
	Texture_t *pTex = &GetTexture( texHandle );
	Assert( pTex );
	IDirect3DTexture *pD3DTex = ( IDirect3DTexture * ) pTex->GetTexture();
	Assert( pD3DTex );

	hr = pD3DTex->GetSurfaceLevel( mipmap, &surf );
	Assert( SUCCEEDED( hr ) && surf );

	hr = surf->UnlockRect();
	Assert( SUCCEEDED( hr ) );

	surf->Release();
}

static float GetAspectRatio( const Texture_t* pTex )
{
	Assert( pTex );
	if ( pTex->m_Height != 0 )
		return float( pTex->m_Width ) / float( pTex->m_Height );

	Assert( !"Height of texture is 0, that seems like a bug." );
	return 0.0f;
}

struct TextureExtents_t
{
	int width;
	int height;
	int depth;
	int mipmaps;

	int fine;
	int coarse;

	TextureExtents_t() : width( 0 ), height( 0 ), depth( 0 ), mipmaps( 0 ), fine( 0 ), coarse( 0 ) { }
};

// Returns positive integer on success, 0 or <0 on failure.
static int FindCommonMipmapRange( int *pOutSrcFine, int *pOutDstFine, Texture_t *pSrcTex, Texture_t *pDstTex )
{
	Assert( pOutSrcFine && pOutDstFine );
	Assert( pSrcTex && pDstTex );

	if ( GetAspectRatio( pSrcTex ) != GetAspectRatio( pDstTex ) )
		return 0;

	TextureExtents_t src, 
		             dst;

	// LOD Clamp indicates that there's no actual data in the finer mipmap levels yet, so respect it when determining
	// the source and destination levels that could have data.
	const int srcLodClamp = pSrcTex->GetLodClamp();
	src.width =  Max( 1, pSrcTex->GetWidth()  >> srcLodClamp );
	src.height = Max( 1, pSrcTex->GetHeight() >> srcLodClamp );
	src.depth =  Max( 1, pSrcTex->GetDepth()  >> srcLodClamp );
	src.mipmaps = pSrcTex->m_NumLevels - srcLodClamp;
	Assert( src.mipmaps >= 1 );
	

	const int dstLodClamp = pDstTex->GetLodClamp();
	dst.width =  Max( 1, pDstTex->GetWidth()  >> dstLodClamp );
	dst.height = Max( 1, pDstTex->GetHeight() >> dstLodClamp );
	dst.depth =  Max( 1, pDstTex->GetDepth()  >> dstLodClamp );
	dst.mipmaps = pDstTex->m_NumLevels - dstLodClamp;
	Assert( dst.mipmaps >= 1 );

	TextureExtents_t *pLarger = NULL, 
		             *pSmaller = NULL;

	if ( src.width >= dst.width && src.height >= dst.height && src.depth >= dst.depth )
	{
		pLarger = &src;
		pSmaller = &dst;
	}
	else
	{
		pLarger = &dst;
		pSmaller = &src;
	}

	// Since we are same aspect ratio, only need to test one dimension
	while ( ( pLarger->width >> pLarger->fine ) > pSmaller->width )
	{
		++pLarger->fine;
		--pLarger->mipmaps;
	}

	( *pOutSrcFine ) = src.fine;
	( *pOutDstFine ) = dst.fine;

	return Min( src.mipmaps, dst.mipmaps );
}

void CShaderAPIDx8::CopyTextureToTexture( ShaderAPITextureHandle_t srcTex, ShaderAPITextureHandle_t dstTex )
{
	LOCK_SHADERAPI();

	AssertValidTextureHandle( srcTex );
	AssertValidTextureHandle( dstTex );

	Assert( TextureIsAllocated( srcTex ) && TextureIsAllocated( dstTex ) );

	Texture_t *pSrcTex = &GetTexture( srcTex );
	Texture_t *pDstTex = &GetTexture( dstTex );

	Assert( pSrcTex && pDstTex );

	// Must have same image format
	Assert( pSrcTex->GetImageFormat() == pDstTex->GetImageFormat() );

	int srcFine = 0,
		dstFine = 0;
	
	int mipmapCount = FindCommonMipmapRange( &srcFine, &dstFine, pSrcTex, pDstTex );
	
	if ( mipmapCount <= 0 )
	{
		// This is legit for things that are streamed in that are very small (near the 32x32 cutoff we do at the 
		// tip of the mipmap pyramid). But leaving it here because it's useful if you're tracking a specific bug.
		// Warning( "Attempted to copy textures that had non-overlapping mipmap pyramids. This has failed and no copy has taken place.\n" );
		return;
	}

	IDirect3DTexture* pSrcD3DTex = ( IDirect3DTexture * ) pSrcTex->GetTexture();
	IDirect3DTexture* pDstD3DTex = ( IDirect3DTexture * ) pDstTex->GetTexture();

	HRESULT hr = S_OK;
	for ( int i = 0; i < mipmapCount; ++i )
	{
		int srcMipmap = srcFine + i;
		int dstMipmap = dstFine + i;

		IDirect3DSurface9* pSrcSurf = NULL;
		IDirect3DSurface9* pDstSurf = NULL;

		hr = pSrcD3DTex->GetSurfaceLevel( srcMipmap, &pSrcSurf );
		Assert( SUCCEEDED( hr ) && pSrcSurf );

		hr = pDstD3DTex->GetSurfaceLevel( dstMipmap, &pDstSurf );
		Assert( SUCCEEDED( hr ) && pDstSurf );

		hr = g_pD3DDevice->StretchRect( pSrcSurf, NULL, pDstSurf, NULL, D3DTEXF_NONE );
		Assert( SUCCEEDED( hr ) );

		pSrcSurf->Release();
		pDstSurf->Release();
	}
}



void CShaderAPIDx8::CopyRenderTargetToTexture( ShaderAPITextureHandle_t textureHandle )
{
	LOCK_SHADERAPI();
	CopyRenderTargetToTextureEx( textureHandle, 0 );
}


void CShaderAPIDx8::CopyTextureToRenderTargetEx( int nRenderTargetID, ShaderAPITextureHandle_t textureHandle, Rect_t *pSrcRect, Rect_t *pDstRect )
{
	LOCK_SHADERAPI();
	VPROF( "CShaderAPIDx8::CopyRenderTargetToTexture" );

	if ( !TextureIsAllocated( textureHandle ) )
		return;

	// Don't flush here!!  If you have to flush here, then there is a driver bug.
	// FlushHardware( );

	AssertValidTextureHandle( textureHandle );
	Texture_t *pTexture = &GetTexture( textureHandle );
	Assert( pTexture );
	IDirect3DTexture *pD3DTexture = (IDirect3DTexture *)pTexture->GetTexture();
	Assert( pD3DTexture );

#if !defined( _X360 )
	IDirect3DSurface* pRenderTargetSurface;
	HRESULT hr = Dx9Device()->GetRenderTarget( nRenderTargetID, &pRenderTargetSurface );
	if ( FAILED( hr ) )
	{
		Assert( 0 );
		return;
	}

	IDirect3DSurface *pDstSurf;
	hr = pD3DTexture->GetSurfaceLevel( 0, &pDstSurf );
	Assert( !FAILED( hr ) );
	if ( FAILED( hr ) )
	{
		pRenderTargetSurface->Release();
		return;
	}

	bool tryblit = true;
	if ( tryblit )
	{
		RECORD_COMMAND( DX8_COPY_FRAMEBUFFER_TO_TEXTURE, 1 );
		RECORD_INT( textureHandle );

		RECT srcRect, dstRect;
		hr = Dx9Device()->StretchRect( pDstSurf, RectToRECT( pSrcRect, srcRect ),
			pRenderTargetSurface, RectToRECT( pDstRect, dstRect ), D3DTEXF_LINEAR );
		Assert( !FAILED( hr ) );
	}

	pDstSurf->Release();
	pRenderTargetSurface->Release();
#else
	Assert( 0 );
#endif
}


//-----------------------------------------------------------------------------
// modifies the vertex data when necessary
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ModifyVertexData( )
{
	// this should be a dead code path
	Assert( 0 );
#if 0
	// We have to modulate the vertex color by the constant color sometimes
	if (IsModulatingVertexColor())
	{
		m_ModifyBuilder.Reset();

		float factor[4];
		unsigned char* pColor = (unsigned char*)&m_DynamicState.m_ConstantColor;
		factor[0] = pColor[0] / 255.0f;
		factor[1] = pColor[1] / 255.0f;
		factor[2] = pColor[2] / 255.0f;
		factor[3] = pColor[3] / 255.0f;

		for ( int i = 0; i < m_ModifyBuilder.VertexCount(); ++i )
		{
			unsigned int color = m_ModifyBuilder.Color();
			unsigned char* pVertexColor = (unsigned char*)&color;

			pVertexColor[0] = (unsigned char)((float)pVertexColor[0] * factor[0]);
			pVertexColor[1] = (unsigned char)((float)pVertexColor[1] * factor[1]);
			pVertexColor[2] = (unsigned char)((float)pVertexColor[2] * factor[2]);
			pVertexColor[3] = (unsigned char)((float)pVertexColor[3] * factor[3]);
			m_ModifyBuilder.Color4ubv( pVertexColor );

			m_ModifyBuilder.AdvanceVertex();
		}
	}
#endif
}

static const char *TextureArgToString( int arg )
{
	static char buf[128];
	switch( arg & D3DTA_SELECTMASK )
	{
	case D3DTA_DIFFUSE:
		strcpy( buf, "D3DTA_DIFFUSE" );
		break;
	case D3DTA_CURRENT:
		strcpy( buf, "D3DTA_CURRENT" );
		break;
	case D3DTA_TEXTURE:
		strcpy( buf, "D3DTA_TEXTURE" );
		break;
	case D3DTA_TFACTOR:
		strcpy( buf, "D3DTA_TFACTOR" );
		break;
	case D3DTA_SPECULAR:
		strcpy( buf, "D3DTA_SPECULAR" );
		break;
	case D3DTA_TEMP:
		strcpy( buf, "D3DTA_TEMP" );
		break;
	default:
		strcpy( buf, "<ERROR>" );
		break;
	}

	if( arg & D3DTA_COMPLEMENT )
	{
		strcat( buf, "|D3DTA_COMPLEMENT" );
	}
	if( arg & D3DTA_ALPHAREPLICATE )
	{
		strcat( buf, "|D3DTA_ALPHAREPLICATE" );
	}
	return buf;
}

static const char *TextureOpToString( D3DTEXTUREOP op )
{
	switch( op )
	{
	case D3DTOP_DISABLE:
		return "D3DTOP_DISABLE";
    case D3DTOP_SELECTARG1:
		return "D3DTOP_SELECTARG1";
    case D3DTOP_SELECTARG2:
		return "D3DTOP_SELECTARG2";
    case D3DTOP_MODULATE:
		return "D3DTOP_MODULATE";
    case D3DTOP_MODULATE2X:
		return "D3DTOP_MODULATE2X";
    case D3DTOP_MODULATE4X:
		return "D3DTOP_MODULATE4X";
    case D3DTOP_ADD:
		return "D3DTOP_ADD";
    case D3DTOP_ADDSIGNED:
		return "D3DTOP_ADDSIGNED";
    case D3DTOP_ADDSIGNED2X:
		return "D3DTOP_ADDSIGNED2X";
    case D3DTOP_SUBTRACT:
		return "D3DTOP_SUBTRACT";
    case D3DTOP_ADDSMOOTH:
		return "D3DTOP_ADDSMOOTH";
    case D3DTOP_BLENDDIFFUSEALPHA:
		return "D3DTOP_BLENDDIFFUSEALPHA";
    case D3DTOP_BLENDTEXTUREALPHA:
		return "D3DTOP_BLENDTEXTUREALPHA";
    case D3DTOP_BLENDFACTORALPHA:
		return "D3DTOP_BLENDFACTORALPHA";
    case D3DTOP_BLENDTEXTUREALPHAPM:
		return "D3DTOP_BLENDTEXTUREALPHAPM";
    case D3DTOP_BLENDCURRENTALPHA:
		return "D3DTOP_BLENDCURRENTALPHA";
    case D3DTOP_PREMODULATE:
		return "D3DTOP_PREMODULATE";
    case D3DTOP_MODULATEALPHA_ADDCOLOR:
		return "D3DTOP_MODULATEALPHA_ADDCOLOR";
    case D3DTOP_MODULATECOLOR_ADDALPHA:
		return "D3DTOP_MODULATECOLOR_ADDALPHA";
    case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
		return "D3DTOP_MODULATEINVALPHA_ADDCOLOR";
    case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
		return "D3DTOP_MODULATEINVCOLOR_ADDALPHA";
    case D3DTOP_BUMPENVMAP:
		return "D3DTOP_BUMPENVMAP";
    case D3DTOP_BUMPENVMAPLUMINANCE:
		return "D3DTOP_BUMPENVMAPLUMINANCE";
    case D3DTOP_DOTPRODUCT3:
		return "D3DTOP_DOTPRODUCT3";
    case D3DTOP_MULTIPLYADD:
		return "D3DTOP_MULTIPLYADD";
    case D3DTOP_LERP:
		return "D3DTOP_LERP";
	default:
		return "<ERROR>";
	}
}

static const char *BlendModeToString( int blendMode )
{
	switch( blendMode )
	{
	case D3DBLEND_ZERO:
		return "D3DBLEND_ZERO";
    case D3DBLEND_ONE:
		return "D3DBLEND_ONE";
    case D3DBLEND_SRCCOLOR:
		return "D3DBLEND_SRCCOLOR";
    case D3DBLEND_INVSRCCOLOR:
		return "D3DBLEND_INVSRCCOLOR";
    case D3DBLEND_SRCALPHA:
		return "D3DBLEND_SRCALPHA";
    case D3DBLEND_INVSRCALPHA:
		return "D3DBLEND_INVSRCALPHA";
    case D3DBLEND_DESTALPHA:
		return "D3DBLEND_DESTALPHA";
    case D3DBLEND_INVDESTALPHA:
		return "D3DBLEND_INVDESTALPHA";
    case D3DBLEND_DESTCOLOR:
		return "D3DBLEND_DESTCOLOR";
    case D3DBLEND_INVDESTCOLOR:
		return "D3DBLEND_INVDESTCOLOR";
    case D3DBLEND_SRCALPHASAT:
		return "D3DBLEND_SRCALPHASAT";
#if !defined( _X360 )
	case D3DBLEND_BOTHSRCALPHA:
		return "D3DBLEND_BOTHSRCALPHA";
    case D3DBLEND_BOTHINVSRCALPHA:
		return "D3DBLEND_BOTHINVSRCALPHA";
#endif
	default:
		return "<ERROR>";
	}
}

//-----------------------------------------------------------------------------
// Spew Board State
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SpewBoardState()
{
	// FIXME: This has regressed
	return;
#ifdef DEBUG_BOARD_STATE
/*
	{
		static ID3DXFont* pFont = 0;
		if (!pFont)
		{
			HFONT hFont = CreateFont( 0, 0, 0, 0, FW_DONTCARE, FALSE, FALSE, FALSE,
				ANSI_CHARSET, OUT_DEFAULT_PRECIS, CLIP_DEFAULT_PRECIS, DEFAULT_QUALITY,
				DEFAULT_PITCH | FF_MODERN, 0 );
			Assert( hFont != 0 );

			HRESULT hr = D3DXCreateFont( Dx9Device(), hFont, &pFont );
		}

		static char buf[1024];
		static RECT r = { 0, 0, 640, 480 };

		if (m_DynamicState.m_VertexBlend == 0)
			return;
		
#if 1
		D3DXMATRIX* m = &GetTransform(MATERIAL_MODEL);
		D3DXMATRIX* m2 = &GetTransform(MATERIAL_MODEL + 1);
		sprintf(buf,"FVF %x\n"
			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n"
			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n\n",
			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n",
			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n",
			ShaderManager()->GetCurrentVertexShader(),
			m->m[0][0],	m->m[0][1],	m->m[0][2],	m->m[0][3],	
			m->m[1][0],	m->m[1][1],	m->m[1][2],	m->m[1][3],	
			m->m[2][0],	m->m[2][1],	m->m[2][2],	m->m[2][3],	
			m->m[3][0], m->m[3][1], m->m[3][2], m->m[3][3],
			m2->m[0][0], m2->m[0][1], m2->m[0][2], m2->m[0][3],	
			m2->m[1][0], m2->m[1][1], m2->m[1][2], m2->m[1][3],	
			m2->m[2][0], m2->m[2][1], m2->m[2][2], m2->m[2][3],	
			m2->m[3][0], m2->m[3][1], m2->m[3][2], m2->m[3][3]
			 );
#else
		Vector4D *pVec2 = &m_DynamicState.m_pVectorVertexShaderConstant[VERTEX_SHADER_MODELVIEWPROJ];
		Vector4D *pVec3 = &m_DynamicState.m_pVectorVertexShaderConstant[VERTEX_SHADER_VIEWPROJ];
		Vector4D *pVec4 = &m_DynamicState.m_pVectorVertexShaderConstant[VERTEX_SHADER_MODEL];

		sprintf(buf,"\n"
			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n"
			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n\n"

			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n"
			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n\n"

			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n"
			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n\n"

			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n"
			"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n",

			pVec1[0][0], pVec1[0][1], pVec1[0][2], pVec1[0][3],	
			pVec1[1][0], pVec1[1][1], pVec1[1][2], pVec1[1][3],	
			pVec1[2][0], pVec1[2][1], pVec1[2][2], pVec1[2][3],	
			pVec1[3][0], pVec1[3][1], pVec1[3][2], pVec1[3][3],

			pVec2[0][0], pVec2[0][1], pVec2[0][2], pVec2[0][3],	
			pVec2[1][0], pVec2[1][1], pVec2[1][2], pVec2[1][3],	
			pVec2[2][0], pVec2[2][1], pVec2[2][2], pVec2[2][3],	
			pVec2[3][0], pVec2[3][1], pVec2[3][2], pVec2[3][3],

			pVec3[0][0], pVec3[0][1], pVec3[0][2], pVec3[0][3],	
			pVec3[1][0], pVec3[1][1], pVec3[1][2], pVec3[1][3],	
			pVec3[2][0], pVec3[2][1], pVec3[2][2], pVec3[2][3],	
			pVec3[3][0], pVec3[3][1], pVec3[3][2], pVec3[3][3],

			pVec4[0][0], pVec4[0][1], pVec4[0][2], pVec4[0][3],	
			pVec4[1][0], pVec4[1][1], pVec4[1][2], pVec4[1][3],	
			pVec4[2][0], pVec4[2][1], pVec4[2][2], pVec4[2][3],	
			0, 0, 0, 1
			);
#endif
		pFont->Begin();
		pFont->DrawText( buf, -1, &r, DT_LEFT | DT_TOP,
			D3DCOLOR_RGBA( 255, 255, 255, 255 ) );
		pFont->End();

		return;
	}

#if 0
	Vector4D *pVec2 = &m_DynamicState.m_pVectorVertexShaderConstant[VERTEX_SHADER_MODELVIEWPROJ];
	Vector4D *pVec3 = &m_DynamicState.m_pVectorVertexShaderConstant[VERTEX_SHADER_VIEWPROJ];
	Vector4D *pVec4 = &m_DynamicState.m_pVectorVertexShaderConstant[VERTEX_SHADER_MODEL];

	static char buf2[1024];
	sprintf(buf2,"\n"
		"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n"
		"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n\n"

		"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n"
		"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n\n"

		"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n"
		"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n\n"

		"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n"
		"[%6.3f %6.3f %6.3f %6.3f]\n[%6.3f %6.3f %6.3f %6.3f]\n",

		pVec1[0][0], pVec1[0][1], pVec1[0][2], pVec1[0][3],	
		pVec1[1][0], pVec1[1][1], pVec1[1][2], pVec1[1][3],	
		pVec1[2][0], pVec1[2][1], pVec1[2][2], pVec1[2][3],	
		pVec1[3][0], pVec1[3][1], pVec1[3][2], pVec1[3][3],

		pVec2[0][0], pVec2[0][1], pVec2[0][2], pVec2[0][3],	
		pVec2[1][0], pVec2[1][1], pVec2[1][2], pVec2[1][3],	
		pVec2[2][0], pVec2[2][1], pVec2[2][2], pVec2[2][3],	
		pVec2[3][0], pVec2[3][1], pVec2[3][2], pVec2[3][3],

		pVec3[0][0], pVec3[0][1], pVec3[0][2], pVec3[0][3],	
		pVec3[1][0], pVec3[1][1], pVec3[1][2], pVec3[1][3],	
		pVec3[2][0], pVec3[2][1], pVec3[2][2], pVec3[2][3],	
		pVec3[3][0], pVec3[3][1], pVec3[3][2], pVec3[3][3],

		pVec4[0][0], pVec4[0][1], pVec4[0][2], pVec4[0][3],	
		pVec4[1][0], pVec4[1][1], pVec4[1][2], pVec4[1][3],	
		pVec4[2][0], pVec4[2][1], pVec4[2][2], pVec4[2][3],	
		0, 0, 0, 1.0f
		);
	Plat_DebugString(buf2);
	return;
#endif
*/

	char buf[256];
	sprintf(buf, "\nSnapshot id %d : \n", m_TransitionTable.CurrentSnapshot() );
	Plat_DebugString(buf);

	ShadowState_t &boardState = m_TransitionTable.BoardState();
	ShadowShaderState_t &boardShaderState = m_TransitionTable.BoardShaderState();

	sprintf(buf,"Depth States: ZFunc %d, ZWrite %d, ZEnable %d, ZBias %d\n",
		boardState.m_ZFunc, boardState.m_ZWriteEnable, 
		boardState.m_ZEnable, boardState.m_ZBias );
	Plat_DebugString(buf);
	sprintf(buf,"Cull Enable %d Cull Mode %d Color Write %d Fill %d Const Color Mod %d sRGBWriteEnable %d\n",
		boardState.m_CullEnable, m_DynamicState.m_CullMode, boardState.m_ColorWriteEnable, 
		boardState.m_FillMode, boardShaderState.m_ModulateConstantColor, boardState.m_SRGBWriteEnable );
	Plat_DebugString(buf);
	sprintf(buf,"Blend States: Blend Enable %d Test Enable %d Func %d SrcBlend %d (%s) DstBlend %d (%s)\n",
		boardState.m_AlphaBlendEnable, boardState.m_AlphaTestEnable, 
		boardState.m_AlphaFunc, boardState.m_SrcBlend, BlendModeToString( boardState.m_SrcBlend ),
		boardState.m_DestBlend, BlendModeToString( boardState.m_DestBlend ) );
	Plat_DebugString(buf);
	int len = sprintf(buf,"Alpha Ref %d, Lighting: %d, Ambient Color %x, LightsEnabled ",
		boardState.m_AlphaRef, boardState.m_Lighting, m_DynamicState.m_Ambient);

	int i;
	for ( i = 0; i < g_pHardwareConfig->Caps().m_MaxNumLights; ++i)
	{
		len += sprintf(buf+len,"%d ", m_DynamicState.m_LightEnable[i] );
	}
	sprintf(buf+len,"\n");
	Plat_DebugString(buf);
	sprintf(buf,"Fixed Function: %d, VertexBlend %d\n",
		boardState.m_UsingFixedFunction, m_DynamicState.m_VertexBlend );
	Plat_DebugString(buf);
	
	sprintf(buf,"Pass Vertex Usage: %llx Pixel Shader %p Vertex Shader %p\n",
		boardShaderState.m_VertexUsage, ShaderManager()->GetCurrentPixelShader(), 
		ShaderManager()->GetCurrentVertexShader() );
	Plat_DebugString(buf);

	// REGRESSED!!!!
	/*
	D3DXMATRIX* m = &GetTransform(MATERIAL_MODEL);
	sprintf(buf,"WorldMat [%4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f]\n",
		m->m[0][0],	m->m[0][1],	m->m[0][2],	m->m[0][3],	
		m->m[1][0],	m->m[1][1],	m->m[1][2],	m->m[1][3],	
		m->m[2][0],	m->m[2][1],	m->m[2][2],	m->m[2][3],	
		m->m[3][0], m->m[3][1], m->m[3][2], m->m[3][3] );
	Plat_DebugString(buf);

	m = &GetTransform(MATERIAL_MODEL + 1);
	sprintf(buf,"WorldMat2 [%4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f]\n",
		m->m[0][0],	m->m[0][1],	m->m[0][2],	m->m[0][3],	
		m->m[1][0],	m->m[1][1],	m->m[1][2],	m->m[1][3],	
		m->m[2][0],	m->m[2][1],	m->m[2][2],	m->m[2][3],	
		m->m[3][0], m->m[3][1], m->m[3][2], m->m[3][3] );
	Plat_DebugString(buf);

	m = &GetTransform(MATERIAL_VIEW);
	sprintf(buf,"ViewMat [%4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f]\n",
		m->m[0][0],	m->m[0][1],	m->m[0][2],	
		m->m[1][0],	m->m[1][1],	m->m[1][2],
		m->m[2][0],	m->m[2][1],	m->m[2][2],
		m->m[3][0], m->m[3][1], m->m[3][2] );
	Plat_DebugString(buf);

	m = &GetTransform(MATERIAL_PROJECTION);
	sprintf(buf,"ProjMat [%4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f]\n",
		m->m[0][0],	m->m[0][1],	m->m[0][2],	
		m->m[1][0],	m->m[1][1],	m->m[1][2],
		m->m[2][0],	m->m[2][1],	m->m[2][2],
		m->m[3][0], m->m[3][1], m->m[3][2] );
	Plat_DebugString(buf);

	for (i = 0; i < GetTextureStageCount(); ++i)
	{
		m = &GetTransform(MATERIAL_TEXTURE0 + i);
		sprintf(buf,"TexMat%d [%4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f %4.3f]\n",
			i, m->m[0][0],	m->m[0][1],	m->m[0][2],	
			m->m[1][0],	m->m[1][1],	m->m[1][2],
			m->m[2][0],	m->m[2][1],	m->m[2][2],
			m->m[3][0], m->m[3][1], m->m[3][2] );
		Plat_DebugString(buf);
	}
	*/

	sprintf(buf,"Viewport (%d %d) [%d %d] %4.3f %4.3f\n",
		m_DynamicState.m_Viewport.X, m_DynamicState.m_Viewport.Y, 
		m_DynamicState.m_Viewport.Width, m_DynamicState.m_Viewport.Height,
		m_DynamicState.m_Viewport.MinZ, m_DynamicState.m_Viewport.MaxZ);
	Plat_DebugString(buf);

	for (i = 0; i < MAX_TEXTURE_STAGES; ++i)
	{
		sprintf(buf,"Stage %d :\n", i);
		Plat_DebugString(buf);
		sprintf(buf,"   Color Op: %d (%s) Color Arg1: %d (%s)",
			boardState.m_TextureStage[i].m_ColorOp, 
			TextureOpToString( boardState.m_TextureStage[i].m_ColorOp ),
			boardState.m_TextureStage[i].m_ColorArg1, 
			TextureArgToString( boardState.m_TextureStage[i].m_ColorArg1 ) );
		Plat_DebugString(buf);
		sprintf( buf, " Color Arg2: %d (%s)\n",
			boardState.m_TextureStage[i].m_ColorArg2,
			TextureArgToString( boardState.m_TextureStage[i].m_ColorArg2 ) );
		Plat_DebugString(buf);
		sprintf(buf,"   Alpha Op: %d (%s) Alpha Arg1: %d (%s)",
			boardState.m_TextureStage[i].m_AlphaOp, 
			TextureOpToString( boardState.m_TextureStage[i].m_AlphaOp ),
			boardState.m_TextureStage[i].m_AlphaArg1, 
			TextureArgToString( boardState.m_TextureStage[i].m_AlphaArg1 ) );
		Plat_DebugString(buf);
		sprintf(buf," Alpha Arg2: %d (%s)\n",
			boardState.m_TextureStage[i].m_AlphaArg2,
			TextureArgToString( boardState.m_TextureStage[i].m_AlphaArg2 ) );
		Plat_DebugString(buf);
	}

	for ( int i = 0; i < MAX_SAMPLERS; ++i )
	{
		sprintf(buf,"	Texture Enabled: %d Bound Texture: %d UWrap: %d VWrap: %d\n",
			SamplerState(i).m_TextureEnable, GetBoundTextureBindId( (Sampler_t)i ),
			SamplerState(i).m_UTexWrap, SamplerState(i).m_VTexWrap );
		Plat_DebugString(buf);
		sprintf(buf,"	Mag Filter: %d Min Filter: %d Mip Filter: %d\n",
			SamplerState(i).m_MagFilter, SamplerState(i).m_MinFilter,
			SamplerState(i).m_MipFilter );
		sprintf(buf,"   MaxMipLevel: %d\n", SamplerState(i).m_FinestMipmapLevel );
		Plat_DebugString(buf);
	}
#else
	Plat_DebugString("::SpewBoardState() Not Implemented Yet");
#endif
}

//-----------------------------------------------------------------------------
// Begin a render pass
//-----------------------------------------------------------------------------
void CShaderAPIDx8::BeginPass( StateSnapshot_t snapshot )
{
	LOCK_SHADERAPI();
	VPROF("CShaderAPIDx8::BeginPass");
	if (IsDeactivated())
		return;

	m_nCurrentSnapshot = snapshot;
//	Assert( m_pRenderMesh );
	// FIXME: This only does anything with temp meshes, so don't bother yet for the new code.
	if( m_pRenderMesh )
	{
		m_pRenderMesh->BeginPass( );
	}
}


//-----------------------------------------------------------------------------
// Render da polygon!
//-----------------------------------------------------------------------------
void CShaderAPIDx8::RenderPass( int nPass, int nPassCount )
{
	if ( IsDeactivated() )
		return;

	Assert( m_nCurrentSnapshot != -1 );
//	Assert( m_pRenderMesh );  MESHFIXME

	m_TransitionTable.UseSnapshot( m_nCurrentSnapshot );
	CommitPerPassStateChanges( m_nCurrentSnapshot );

	// Make sure that we bound a texture for every stage that is enabled
	// NOTE: not enabled/finished yet... see comment in CShaderAPIDx8::ApplyTextureEnable
//	int nSampler;
//	for ( nSampler = 0; nSampler < g_pHardwareConfig->GetSamplerCount(); nSampler++ )
//	{
//		if ( SamplerState( nSampler ).m_TextureEnable )
//		{
//		}
//	}
	
#ifdef DEBUG_BOARD_STATE
	// Spew out render state...
	if ( m_pMaterial->PerformDebugTrace() )
	{
		SpewBoardState();
	}
#endif

#ifdef TEST_CACHE_LOCKS
	g_pDataCache->Flush();
#endif

//	Assert( m_pRenderMesh );  MESHFIXME
	if ( m_pRenderMesh )
	{
		m_pRenderMesh->RenderPass();
	}
	else
	{
		MeshMgr()->RenderPassWithVertexAndIndexBuffers();
	}
	m_nCurrentSnapshot = -1;
}


//-----------------------------------------------------------------------------
// Matrix mode
//-----------------------------------------------------------------------------
void CShaderAPIDx8::MatrixMode( MaterialMatrixMode_t matrixMode )
{
	// NOTE!!!!!!
	// The only time that m_MatrixMode is used is for texture matrices.  Do not use
	// it for anything else unless you change this code!
	if ( matrixMode >= MATERIAL_TEXTURE0 && matrixMode <= MATERIAL_TEXTURE7 )
	{
		m_MatrixMode = ( D3DTRANSFORMSTATETYPE )( matrixMode - MATERIAL_TEXTURE0 + D3DTS_TEXTURE0 );
	}
	else
	{
		m_MatrixMode = (D3DTRANSFORMSTATETYPE)-1;
	}

	m_CurrStack = GetMatrixStack( matrixMode );
}
				 	    
//-----------------------------------------------------------------------------
// the current camera position in world space.
//-----------------------------------------------------------------------------
void CShaderAPIDx8::GetWorldSpaceCameraPosition( float* pPos ) const
{
	memcpy( pPos, m_WorldSpaceCameraPositon.Base(), sizeof( float[3] ) );
}

void CShaderAPIDx8::CacheWorldSpaceCameraPosition()
{
	D3DXMATRIX& view = GetTransform(MATERIAL_VIEW);
	m_WorldSpaceCameraPositon[0] = 
		-( view( 3, 0 ) * view( 0, 0 ) + 
		   view( 3, 1 ) * view( 0, 1 ) + 
		   view( 3, 2 ) * view( 0, 2 ) );
	m_WorldSpaceCameraPositon[1] = 
		-( view( 3, 0 ) * view( 1, 0 ) + 
		   view( 3, 1 ) * view( 1, 1 ) + 
		   view( 3, 2 ) * view( 1, 2 ) );
	m_WorldSpaceCameraPositon[2] = 
		-( view( 3, 0 ) * view( 2, 0 ) + 
		   view( 3, 1 ) * view( 2, 1 ) + 
		   view( 3, 2 ) * view( 2, 2 ) );
	m_WorldSpaceCameraPositon[3] = 1.0f;

	// Protect against zero, as some pixel shaders will divide by this in CalcWaterFogAlpha() in common_ps_fxc.h
	if ( fabs( m_WorldSpaceCameraPositon[2] ) <= 0.00001f )
	{
		m_WorldSpaceCameraPositon[2] = 0.01f;
	}
}


//-----------------------------------------------------------------------------
// Computes a matrix which includes the poly offset given an initial projection matrix
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ComputePolyOffsetMatrix( const D3DXMATRIX& matProjection, D3DXMATRIX &matProjectionOffset )
{
	// We never need to do this on hardware that can handle zbias
	if ( g_pHardwareConfig->Caps().m_ZBiasAndSlopeScaledDepthBiasSupported )
		return;

	float offsetVal = 
		-1.0f * (m_DesiredState.m_Viewport.MaxZ - m_DesiredState.m_Viewport.MinZ) /
		16384.0f;

	D3DXMATRIX offset;
	D3DXMatrixTranslation( &offset, 0.0f, 0.0f, offsetVal );
	D3DXMatrixMultiply( &matProjectionOffset, &matProjection, &offset );
}


//-----------------------------------------------------------------------------
// Caches off the poly-offset projection matrix
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CachePolyOffsetProjectionMatrix()
{
	ComputePolyOffsetMatrix( GetTransform(MATERIAL_PROJECTION), m_CachedPolyOffsetProjectionMatrix );
}


//-----------------------------------------------------------------------------
// Performs a flush on the matrix state	if necessary
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::MatrixIsChanging( TransformType_t type )
{
	if ( IsDeactivated() )	
	{
		return false;
	}

	// early out if the transform is already one of our standard types
	if ((type != TRANSFORM_IS_GENERAL) && (type == m_DynamicState.m_TransformType[m_CurrStack]))
		return false;

	// Only flush state if we're changing something other than a texture transform
	int textureMatrix = m_CurrStack - MATERIAL_TEXTURE0;
	if (( textureMatrix < 0 ) || (textureMatrix >= NUM_TEXTURE_TRANSFORMS))
		FlushBufferedPrimitivesInternal();

	return true;
}

void CShaderAPIDx8::SetTextureTransformDimension( TextureStage_t textureMatrix, int dimension, bool projected )
{
	D3DTEXTURETRANSFORMFLAGS textureTransformFlags = ( D3DTEXTURETRANSFORMFLAGS )dimension;
	if( projected )
	{
		Assert( sizeof( int ) == sizeof( D3DTEXTURETRANSFORMFLAGS ) );
		( *( int * )&textureTransformFlags ) |= D3DTTFF_PROJECTED;
	}

	if (TextureStage(textureMatrix).m_TextureTransformFlags != textureTransformFlags )
	{
		SetTextureStageState( textureMatrix, D3DTSS_TEXTURETRANSFORMFLAGS, textureTransformFlags );
		TextureStage(textureMatrix).m_TextureTransformFlags = textureTransformFlags;
	}
}

void CShaderAPIDx8::DisableTextureTransform( TextureStage_t textureMatrix )
{
	if (TextureStage(textureMatrix).m_TextureTransformFlags != D3DTTFF_DISABLE )
	{
		SetTextureStageState( textureMatrix, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_DISABLE );
		TextureStage(textureMatrix).m_TextureTransformFlags = D3DTTFF_DISABLE;
	}
}

void CShaderAPIDx8::SetBumpEnvMatrix( TextureStage_t textureStage, float m00, float m01, float m10, float m11 )
{	
	TextureStageState_t &textureStageState = TextureStage( textureStage );
	
	if( textureStageState.m_BumpEnvMat00 != m00 ||
		textureStageState.m_BumpEnvMat01 != m01 ||
		textureStageState.m_BumpEnvMat10 != m10 ||
		textureStageState.m_BumpEnvMat11 != m11 )
	{
		SetTextureStageState( textureStage, D3DTSS_BUMPENVMAT00, *( ( LPDWORD ) (&m00) ) );
		SetTextureStageState( textureStage, D3DTSS_BUMPENVMAT01, *( ( LPDWORD ) (&m01) ) );
		SetTextureStageState( textureStage, D3DTSS_BUMPENVMAT10, *( ( LPDWORD ) (&m10) ) );
		SetTextureStageState( textureStage, D3DTSS_BUMPENVMAT11, *( ( LPDWORD ) (&m11) ) );
		textureStageState.m_BumpEnvMat00 = m00;
		textureStageState.m_BumpEnvMat01 = m01;
		textureStageState.m_BumpEnvMat10 = m10;
		textureStageState.m_BumpEnvMat11 = m11;
	}
}

//-----------------------------------------------------------------------------
// Sets the actual matrix state
//-----------------------------------------------------------------------------
void CShaderAPIDx8::UpdateMatrixTransform( TransformType_t type )
{
	int textureMatrix = m_CurrStack - MATERIAL_TEXTURE0;
	if (( textureMatrix >= 0 ) && (textureMatrix < NUM_TEXTURE_TRANSFORMS))
	{
		// NOTE: Flush shouldn't happen here because we
		// expect that texture transforms will be set within the shader

		// FIXME: We only want to use D3DTTFF_COUNT3 for cubemaps
		// D3DTFF_COUNT2 is used for non-cubemaps. Of course, if there's
		// no performance penalty for COUNT3, we should just use that.
		D3DTEXTURETRANSFORMFLAGS transformFlags;
		transformFlags = (type == TRANSFORM_IS_IDENTITY) ? D3DTTFF_DISABLE : D3DTTFF_COUNT3;

		if (TextureStage(textureMatrix).m_TextureTransformFlags != transformFlags )
		{
			SetTextureStageState( textureMatrix, D3DTSS_TEXTURETRANSFORMFLAGS, transformFlags );
			TextureStage(textureMatrix).m_TextureTransformFlags = transformFlags;
		}
	}

	m_DynamicState.m_TransformType[m_CurrStack] = type;
	m_DynamicState.m_TransformChanged[m_CurrStack] = STATE_CHANGED;

#ifdef _DEBUG
	// Store off the board state
	D3DXMATRIX *pSrc = &GetTransform(m_CurrStack);
	D3DXMATRIX *pDst = &m_DynamicState.m_Transform[m_CurrStack];
//	Assert( *pSrc != *pDst );
	memcpy( pDst, pSrc, sizeof(D3DXMATRIX) );
#endif

	if ( m_CurrStack == MATERIAL_VIEW )
	{
		CacheWorldSpaceCameraPosition();
	}

	if ( !IsX360() && m_CurrStack == MATERIAL_PROJECTION )
	{
		CachePolyOffsetProjectionMatrix();
	}

	// Any time the view or projection matrix changes, the user clip planes need recomputing....
	// Assuming we're not overriding the user clip transform
	if ( ( m_CurrStack == MATERIAL_PROJECTION ) ||
		 ( ( m_CurrStack == MATERIAL_VIEW ) && ( !m_DynamicState.m_bUserClipTransformOverride ) ) )
	{
		MarkAllUserClipPlanesDirty();
	}

	// Set the state if it's a texture transform
	if ( (m_CurrStack >= MATERIAL_TEXTURE0) && (m_CurrStack <= MATERIAL_TEXTURE7) )
	{
		SetTransform( m_MatrixMode, &GetTransform(m_CurrStack) );
	}
}


//--------------------------------------------------------------------------------
// deformations
//--------------------------------------------------------------------------------
void CShaderAPIDx8::PushDeformation( DeformationBase_t const *pDef )
{
	Assert( m_pDeformationStackPtr > m_DeformationStack );
	--m_pDeformationStackPtr;
	m_pDeformationStackPtr->m_nDeformationType = pDef->m_eType;

	switch( pDef->m_eType )
	{
		case DEFORMATION_CLAMP_TO_BOX_IN_WORLDSPACE:
		{
			BoxDeformation_t const *pBox = reinterpret_cast< const BoxDeformation_t *>( pDef );
			m_pDeformationStackPtr->m_nNumParameters = 16;
			memcpy( m_pDeformationStackPtr->m_flDeformationParameters, &( pBox->m_SourceMins.x ), 16 * sizeof( float ) );
			break;
		}
		break;
		
		default:
			Assert( 0 );
	}
}


void CShaderAPIDx8::PopDeformation( )
{
	Assert( m_pDeformationStackPtr != m_DeformationStack + DEFORMATION_STACK_DEPTH );
	++m_pDeformationStackPtr;
}

int CShaderAPIDx8::GetNumActiveDeformations( void ) const
{
	return ( m_DeformationStack + DEFORMATION_STACK_DEPTH ) - m_pDeformationStackPtr;
}


// for shaders to set vertex shader constants. returns a packed state which can be used to set the dynamic combo
int CShaderAPIDx8::GetPackedDeformationInformation( int nMaskOfUnderstoodDeformations,
													 float *pConstantValuesOut,
													 int nBufferSize,
													 int nMaximumDeformations,
													 int *pDefCombosOut ) const
{
	int nCombosFound = 0;
	memset( pDefCombosOut, 0, sizeof( pDefCombosOut[0] ) * nMaximumDeformations );
	size_t nRemainingBufferSize = nBufferSize;

	for( const Deformation_t *i = m_DeformationStack + DEFORMATION_STACK_DEPTH -1; i >= m_pDeformationStackPtr; i-- )
	{
		int nFloatsOut = 4 * ( ( i->m_nNumParameters + 3 )>> 2 );
		if ( 
			( ( 1 << i->m_nDeformationType )  & nMaskOfUnderstoodDeformations ) &&
			( nRemainingBufferSize >= ( nFloatsOut * sizeof( float ) ) ) )
		{
			memcpy( pConstantValuesOut, i->m_flDeformationParameters, nFloatsOut * sizeof( float ) );
			pConstantValuesOut += nFloatsOut;
			nRemainingBufferSize -= nFloatsOut * sizeof( float );
			( *pDefCombosOut++ ) = i->m_nDeformationType;
			nCombosFound++;
		}
	}
	return nCombosFound;
}




//-----------------------------------------------------------------------------
// Matrix stack operations
//-----------------------------------------------------------------------------

void CShaderAPIDx8::PushMatrix()
{
	// NOTE: No matrix transform update needed here.
	m_pMatrixStack[m_CurrStack]->Push();
}

void CShaderAPIDx8::PopMatrix()
{
	if (MatrixIsChanging())
	{
		m_pMatrixStack[m_CurrStack]->Pop();
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::LoadIdentity( )
{
	if (MatrixIsChanging(TRANSFORM_IS_IDENTITY))
	{
		m_pMatrixStack[m_CurrStack]->LoadIdentity( );
		UpdateMatrixTransform( TRANSFORM_IS_IDENTITY );
	}
}

void CShaderAPIDx8::LoadCameraToWorld( )
{
	if (MatrixIsChanging(TRANSFORM_IS_CAMERA_TO_WORLD))
	{
		// could just use the transpose instead, if we know there's no scale
		float det;
		D3DXMATRIX inv;
		D3DXMatrixInverse( &inv, &det, &GetTransform(MATERIAL_VIEW) );

		// Kill translation
		inv.m[3][0] = inv.m[3][1] = inv.m[3][2] = 0.0f;

		m_pMatrixStack[m_CurrStack]->LoadMatrix( &inv );
		UpdateMatrixTransform( TRANSFORM_IS_CAMERA_TO_WORLD );
	}
}

void CShaderAPIDx8::LoadMatrix( float *m )
{
	// Check for identity...
	if ( (fabs(m[0] - 1.0f) < 1e-3) && (fabs(m[5] - 1.0f) < 1e-3) && (fabs(m[10] - 1.0f) < 1e-3) && (fabs(m[15] - 1.0f) < 1e-3) &&
		 (fabs(m[1]) < 1e-3)  && (fabs(m[2]) < 1e-3)  && (fabs(m[3]) < 1e-3) &&
		 (fabs(m[4]) < 1e-3)  && (fabs(m[6]) < 1e-3)  && (fabs(m[7]) < 1e-3) &&
		 (fabs(m[8]) < 1e-3)  && (fabs(m[9]) < 1e-3)  && (fabs(m[11]) < 1e-3) &&
		 (fabs(m[12]) < 1e-3) && (fabs(m[13]) < 1e-3) && (fabs(m[14]) < 1e-3) )
	{
		LoadIdentity();
		return;
	}

	if (MatrixIsChanging())
	{
		m_pMatrixStack[m_CurrStack]->LoadMatrix( (D3DXMATRIX*)m );
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::LoadBoneMatrix( int boneIndex, const float *m )
{
	if ( IsDeactivated() )	
		return;

	memcpy( m_boneMatrix[boneIndex].Base(), m, sizeof(float)*12 );
	if ( boneIndex > m_maxBoneLoaded )
	{
		m_maxBoneLoaded = boneIndex;
	}
	if ( boneIndex == 0 )
	{
		MatrixMode( MATERIAL_MODEL );
		VMatrix transposeMatrix;
		transposeMatrix.Init( *(matrix3x4_t *)m );
		MatrixTranspose( transposeMatrix, transposeMatrix );
		LoadMatrix( (float*)transposeMatrix.m );
	}
}

//-----------------------------------------------------------------------------
// Commits morph target factors
//-----------------------------------------------------------------------------
static void CommitFlexWeights( IDirect3DDevice9 *pDevice, const DynamicState_t &desiredState, 
									 DynamicState_t &currentState, bool bForce )
{
	if ( IsX360() )
	{
		// not supporting for 360
		return;
	}

	CommitVertexShaderConstantRange( pDevice, desiredState, currentState, bForce,
		VERTEX_SHADER_FLEX_WEIGHTS, VERTEX_SHADER_MAX_FLEX_WEIGHT_COUNT );
}

void CShaderAPIDx8::SetFlexWeights( int nFirstWeight, int nCount, const MorphWeight_t* pWeights )
{
	if ( IsX360() )
	{
		// not supported for 360
		return;
	}

	LOCK_SHADERAPI();
	if ( g_pHardwareConfig->Caps().m_NumVertexShaderConstants < VERTEX_SHADER_FLEX_WEIGHTS + VERTEX_SHADER_MAX_FLEX_WEIGHT_COUNT )
		return;

	if ( nFirstWeight + nCount > VERTEX_SHADER_MAX_FLEX_WEIGHT_COUNT )	
	{
		Warning( "Attempted to set too many flex weights! Max is %d\n", VERTEX_SHADER_MAX_FLEX_WEIGHT_COUNT );
		nCount = VERTEX_SHADER_MAX_FLEX_WEIGHT_COUNT - nFirstWeight;
	}

	if ( nCount <= 0 )
		return;

	float *pDest = m_DesiredState.m_pVectorVertexShaderConstant[ VERTEX_SHADER_FLEX_WEIGHTS + nFirstWeight ].Base();
	memcpy( pDest, pWeights, nCount * sizeof(MorphWeight_t) );

	ADD_COMMIT_FUNC( COMMIT_PER_DRAW, COMMIT_VERTEX_SHADER, CommitFlexWeights );
}

void CShaderAPIDx8::MultMatrix( float *m )
{
	if (MatrixIsChanging())
	{
		m_pMatrixStack[m_CurrStack]->MultMatrix( (D3DXMATRIX*)m );
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::MultMatrixLocal( float *m )
{
	if (MatrixIsChanging())
	{
		m_pMatrixStack[m_CurrStack]->MultMatrixLocal( (D3DXMATRIX*)m );
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::Rotate( float angle, float x, float y, float z )
{
	if (MatrixIsChanging())
	{
		D3DXVECTOR3 axis( x, y, z );
		m_pMatrixStack[m_CurrStack]->RotateAxisLocal( &axis, M_PI * angle / 180.0f );
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::Translate( float x, float y, float z )
{
	if (MatrixIsChanging())
	{
		m_pMatrixStack[m_CurrStack]->TranslateLocal( x, y, z );
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::Scale( float x, float y, float z )
{
	if (MatrixIsChanging())
	{
		m_pMatrixStack[m_CurrStack]->ScaleLocal( x, y, z );
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::ScaleXY( float x, float y )
{
	if (MatrixIsChanging())
	{
		m_pMatrixStack[m_CurrStack]->ScaleLocal( x, y, 1.0f );
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::Ortho( double left, double top, double right, double bottom, double zNear, double zFar )
{
	if (MatrixIsChanging())
	{
		D3DXMATRIX matrix;

		// FIXME: This is being used incorrectly! Should read:
		// D3DXMatrixOrthoOffCenterRH( &matrix, left, right, bottom, top, zNear, zFar );
		// Which is certainly why we need these extra -1 scales in y. Bleah

		// NOTE: The camera can be imagined as the following diagram:
		//		/z
		//	   /
		//	  /____ x	Z is going into the screen
		//	  |
		//	  |
		//	  |y
		//
		// (0,0,z) represents the upper-left corner of the screen.
		// Our projection transform needs to transform from this space to a LH coordinate
		// system that looks thusly:
		// 
		//	y|  /z
		//	 | /
		//	 |/____ x	Z is going into the screen
		//
		// Where x,y lies between -1 and 1, and z lies from 0 to 1
		// This is because the viewport transformation from projection space to pixels
		// introduces a -1 scale in the y coordinates
//		D3DXMatrixOrthoOffCenterLH( &matrix, left, right, bottom, top, zNear, zFar );

		D3DXMatrixOrthoOffCenterRH( &matrix, left, right, top, bottom, zNear, zFar );
		m_pMatrixStack[m_CurrStack]->MultMatrixLocal(&matrix);
		Assert( m_CurrStack == MATERIAL_PROJECTION );
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::PerspectiveX( double fovx, double aspect, double zNear, double zFar )
{
	if (MatrixIsChanging())
	{
		float width = 2 * zNear * tan( fovx * M_PI / 360.0 );
		float height = width / aspect;
		Assert( m_CurrStack == MATERIAL_PROJECTION );
		D3DXMATRIX rh;
		D3DXMatrixPerspectiveRH( &rh, width, height, zNear, zFar );
		m_pMatrixStack[m_CurrStack]->MultMatrixLocal(&rh);
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::PerspectiveOffCenterX( double fovx, double aspect, double zNear, double zFar, double bottom, double top, double left, double right )
{
	if (MatrixIsChanging())
	{
		float width = 2 * zNear * tan( fovx * M_PI / 360.0 );
		float height = width / aspect;

		// bottom, top, left, right are 0..1 so convert to -1..1
		float flFrontPlaneLeft   = -(width/2.0f)  * (1.0f - left)   + left   * (width/2.0f);
		float flFrontPlaneRight  = -(width/2.0f)  * (1.0f - right)  + right  * (width/2.0f);
		float flFrontPlaneBottom = -(height/2.0f) * (1.0f - bottom) + bottom * (height/2.0f);
		float flFrontPlaneTop    = -(height/2.0f) * (1.0f - top)    + top    * (height/2.0f);

		Assert( m_CurrStack == MATERIAL_PROJECTION );
		D3DXMATRIX rh;
		D3DXMatrixPerspectiveOffCenterRH( &rh, flFrontPlaneLeft, flFrontPlaneRight, flFrontPlaneBottom, flFrontPlaneTop, zNear, zFar );
		m_pMatrixStack[m_CurrStack]->MultMatrixLocal(&rh);
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::PickMatrix( int x, int y, int width, int height )
{
	if (MatrixIsChanging())
	{
		Assert( m_CurrStack == MATERIAL_PROJECTION );

		// This is going to create a matrix to append to the standard projection.
		// Projection space goes from -1 to 1 in x and y. This matrix we append
		// will transform the pick region to -1 to 1 in projection space
		ShaderViewport_t viewport;
		GetViewports( &viewport, 1 );
		
		int vx = viewport.m_nTopLeftX;
		int vy = viewport.m_nTopLeftX;
		int vwidth = viewport.m_nWidth;
		int vheight = viewport.m_nHeight;

		// Compute the location of the pick region in projection space...
		float px = 2.0 * (float)(x - vx) / (float)vwidth - 1;
		float py = 2.0 * (float)(y - vy)/ (float)vheight - 1;
		float pw = 2.0 * (float)width / (float)vwidth;
		float ph = 2.0 * (float)height / (float)vheight;

		// we need to translate (px, py) to the origin
		// and scale so (pw,ph) -> (2, 2)
		D3DXMATRIX matrix;
		D3DXMatrixIdentity( &matrix );
		matrix.m[0][0] = 2.0 / pw;
		matrix.m[1][1] = 2.0 / ph;
		matrix.m[3][0] = -2.0 * px / pw;
		matrix.m[3][1] = -2.0 * py / ph;

		m_pMatrixStack[m_CurrStack]->MultMatrixLocal(&matrix);
		UpdateMatrixTransform();
	}
}

void CShaderAPIDx8::GetMatrix( MaterialMatrixMode_t matrixMode, float *dst )
{
	memcpy( dst, (void*)(FLOAT*)GetTransform(matrixMode), sizeof(D3DXMATRIX) );
}


//-----------------------------------------------------------------------------
// Did a transform change?
//-----------------------------------------------------------------------------
inline bool CShaderAPIDx8::VertexShaderTransformChanged( int i )
{
	return (m_DynamicState.m_TransformChanged[i] & STATE_CHANGED_VERTEX_SHADER) != 0;
}

inline bool CShaderAPIDx8::FixedFunctionTransformChanged( int i )
{
	return (m_DynamicState.m_TransformChanged[i] & STATE_CHANGED_FIXED_FUNCTION) != 0;
}


const D3DXMATRIX &CShaderAPIDx8::GetProjectionMatrix( void )
{
	bool bUsingZBiasProjectionMatrix = 
		!g_pHardwareConfig->Caps().m_ZBiasAndSlopeScaledDepthBiasSupported &&
		( m_TransitionTable.CurrentSnapshot() != -1 ) &&
		m_TransitionTable.CurrentShadowState() &&
		m_TransitionTable.CurrentShadowState()->m_ZBias;

	if ( !m_DynamicState.m_FastClipEnabled )
	{
		if ( bUsingZBiasProjectionMatrix )
			return m_CachedPolyOffsetProjectionMatrix;

		return GetTransform( MATERIAL_PROJECTION );
	}
	
	if ( bUsingZBiasProjectionMatrix )
		return m_CachedFastClipPolyOffsetProjectionMatrix;

	return m_CachedFastClipProjectionMatrix;
}


//-----------------------------------------------------------------------------
// Workaround hack for visualization of selection mode
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetupSelectionModeVisualizationState()
{
	Dx9Device()->SetRenderState( D3DRS_CULLMODE, D3DCULL_NONE );

	D3DXMATRIX ident;
	D3DXMatrixIdentity( &ident );
	SetTransform( D3DTS_WORLD, &ident );
	SetTransform( D3DTS_VIEW, &ident );
	SetTransform( D3DTS_PROJECTION, &ident );

	if ( g_pHardwareConfig->Caps().m_SupportsPixelShaders )
	{
		SetVertexShaderConstant( VERTEX_SHADER_VIEWPROJ, ident, 4 );
		SetVertexShaderConstant( VERTEX_SHADER_MODELVIEWPROJ, ident, 4 );
		float *pRowTwo = (float *)ident + 8;
		SetVertexShaderConstant( VERTEX_SHADER_MODELVIEWPROJ_THIRD_ROW, pRowTwo, 1 ); // Row two of an identity matrix
		SetVertexShaderConstant( VERTEX_SHADER_MODEL, ident, 3 * NUM_MODEL_TRANSFORMS );
	}
}


//-----------------------------------------------------------------------------
// Set view transforms
//-----------------------------------------------------------------------------

static void printmat4x4( char *label, float *m00 )
{
	// print label..
	// fetch 4 from row, print as a row
	// fetch 4 from column, print as a row
	
#ifdef DX_TO_GL_ABSTRACTION
	float	row[4];
	float	col[4];
	
	GLMPRINTF(("-M-    -- %s --", label ));
	for( int n=0; n<4; n++ )
	{
		// extract row and column floats
		for( int i=0; i<4;i++)
		{
			row[i] = m00[(n*4)+i];			
			col[i] = m00[(i*4)+n];
		}
		GLMPRINTF((		"-M-    [ %7.4f %7.4f %7.4f %7.4f ] T=> [ %7.4f %7.4f %7.4f %7.4f ]",
						row[0],row[1],row[2],row[3],
						col[0],col[1],col[2],col[3]						
						));
	}
	GLMPRINTF(("-M-"));
#endif
}

void CShaderAPIDx8::SetVertexShaderViewProj()
{
	//GLM_FUNC;
	//GLMPRINTF(( ">-M- SetVertexShaderViewProj" ));
	
	if (g_pHardwareConfig->Caps().m_SupportsPixelShaders)
	{
		D3DXMATRIX transpose;
		if(0)
		{
			transpose = GetTransform(MATERIAL_VIEW) * GetProjectionMatrix();
			D3DXMatrixTranspose( &transpose, &transpose );
		}
		else
		{
			// show work
			D3DXMATRIX	matView,matProj;
			
			matView		= GetTransform(MATERIAL_VIEW);
			matProj		= GetProjectionMatrix();
			transpose	= matView * matProj;
			
			//printmat4x4( "matView", (float*)&matView );
			//printmat4x4( "matProj", (float*)&matProj );
			//printmat4x4( "result (view * proj) pre-transpose", (float*)&transpose );

			D3DXMatrixTranspose( &transpose, &transpose );

			#if 0	// turned off while we try to do fixup-Y in shader translate
				if (IsPosix())			// flip all shader projection matrices for Y on GL since you can't have an upside-down viewport specification
				{
					// flip Y
					transpose._21 *= -1.0f;
					transpose._22 *= -1.0f;
					transpose._23 *= -1.0f;
					transpose._24 *= -1.0f;
				}
			#endif
			
			//printmat4x4( "result (view * proj) post-transpose", (float*)&transpose );
		}

		
		SetVertexShaderConstant( VERTEX_SHADER_VIEWPROJ, transpose, 4 );

		// If we're doing FastClip, the above viewproj matrix won't work well for
		// vertex shaders which compute projPos.z, hence we'll compute a more useful
		// viewproj and put the third row of it in another constant
		transpose = GetTransform( MATERIAL_VIEW ) * GetTransform( MATERIAL_PROJECTION ); // Get the non-FastClip projection matrix
		D3DXMatrixTranspose( &transpose, &transpose );

		float *pRowTwo = (float *)transpose + 8;
		SetVertexShaderConstant( VERTEX_SHADER_VIEWPROJ_THIRD_ROW, pRowTwo, 1 );
	}
	//GLMPRINTF(( "<-M- SetVertexShaderViewProj" ));
}

void CShaderAPIDx8::SetVertexShaderModelViewProjAndModelView( void )
{
	//GLM_FUNC;
	//GLMPRINTF(( ">-M- SetVertexShaderModelViewProjAndModelView" ));
	
	if (g_pHardwareConfig->Caps().m_SupportsPixelShaders)
	{
		D3DXMATRIX modelView, transpose;
		
		if (0)
		{
			D3DXMatrixMultiply( &modelView, &GetTransform(MATERIAL_MODEL), &GetTransform(MATERIAL_VIEW) );
			D3DXMatrixMultiply( &transpose, &modelView, &GetProjectionMatrix() );
		}
		else
		{
			// show work
			D3DXMATRIX	matView,matProj,matModel;
			
			matModel	= GetTransform(MATERIAL_MODEL);
			matView		= GetTransform(MATERIAL_VIEW);
			matProj		= GetProjectionMatrix();
			
			D3DXMatrixMultiply( &modelView, &matModel, &matView );
			D3DXMatrixMultiply( &transpose, &modelView, &matProj );
			
			//printmat4x4( "matModel", (float*)&matModel );
			//printmat4x4( "matView", (float*)&matView );
			//printmat4x4( "matProj", (float*)&matProj );
			//printmat4x4( "result (model * view * proj) pre-transpose", (float*)&transpose );
		}

		D3DXMatrixTranspose( &transpose, &transpose );
		
		#if 0	// turned off while we try to do fixup-Y in shader translate
			if (IsPosix())			// flip all shader projection matrices for Y on GL since you can't have an upside-down viewport specification
			{
				// flip Y
				transpose._21 *= -1.0f;
				transpose._22 *= -1.0f;
				transpose._23 *= -1.0f;
				transpose._24 *= -1.0f;
			}
		#endif
		
		SetVertexShaderConstant( VERTEX_SHADER_MODELVIEWPROJ, transpose, 4 );

		// If we're doing FastClip, the above modelviewproj matrix won't work well for
		// vertex shaders which compute projPos.z, hence we'll compute a more useful
		// modelviewproj and put the third row of it in another constant
		D3DXMatrixMultiply( &transpose, &modelView, &GetTransform( MATERIAL_PROJECTION ) ); // Get the non-FastClip projection matrix
		D3DXMatrixTranspose( &transpose, &transpose );

		float *pRowTwo = (float *)transpose + 8;
		SetVertexShaderConstant( VERTEX_SHADER_MODELVIEWPROJ_THIRD_ROW, pRowTwo, 1 );
	}
	
	//GLMPRINTF(( "<-M- SetVertexShaderModelViewProjAndModelView" ));
}

void CShaderAPIDx8::UpdateVertexShaderMatrix( int iMatrix )
{
	//GLM_FUNC;
	if ( iMatrix == 0 )
	{
		int matrix = MATERIAL_MODEL;
		if (VertexShaderTransformChanged(matrix))
		{
			int vertexShaderConstant = VERTEX_SHADER_MODEL + iMatrix * 3;

			// Put the transform into the vertex shader constants...
			D3DXMATRIX transpose;
			D3DXMatrixTranspose( &transpose, &GetTransform(matrix) );
			SetVertexShaderConstant( vertexShaderConstant, transpose, 3 );

			// clear the change flag
			m_DynamicState.m_TransformChanged[matrix] &= ~STATE_CHANGED_VERTEX_SHADER;
		}
	}
	else
	{
		SetVertexShaderConstant( VERTEX_SHADER_MODEL + iMatrix, m_boneMatrix[iMatrix].Base(), 3 );
	}
}


void CShaderAPIDx8::SetVertexShaderStateSkinningMatrices()
{
	//GLM_FUNC;
	// casting from 4x3 matrices to a 4x4 D3DXMATRIX, need 4 floats of overflow
	float results[12+4];

	// get the first one from the MATERIAL_MODEL matrix stack
	D3DXMatrixTranspose( (D3DXMATRIX *)&results[0], &GetTransform( MATERIAL_MODEL ) );
	memcpy( m_boneMatrix[0].Base(), results, 12 * sizeof(float) );

	m_maxBoneLoaded++;
	int matricesLoaded = max( 1, m_maxBoneLoaded );
	m_maxBoneLoaded = 0;

	m_DynamicState.m_TransformChanged[MATERIAL_MODEL] &= ~STATE_CHANGED_VERTEX_SHADER;
	SetVertexShaderConstant( VERTEX_SHADER_MODEL, m_boneMatrix[0].Base(), matricesLoaded * 3, true );

	// ###OSX### punting on OSX for now		
#if defined( DX_TO_GL_ABSTRACTION ) && !defined( OSX )
	Dx9Device()->SetMaxUsedVertexShaderConstantsHint( VERTEX_SHADER_MODEL + ( matricesLoaded * 3 ) );
#endif

}

//-----------------------------------------------------------------------------
// Commits vertex shader transforms that can change on a per pass basis
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitPerPassVertexShaderTransforms()
{
	//GLMPRINTF(( ">-M- CommitPerPassVertexShaderTransforms" ));
	Assert( g_pHardwareConfig->Caps().m_SupportsPixelShaders );

	bool projChanged = VertexShaderTransformChanged( MATERIAL_PROJECTION );
	//projChanged = true;	//only for debug
	if ( projChanged )
	{
		//GLMPRINTF(( "-M- projChanged=true in CommitPerPassVertexShaderTransforms" ));
		SetVertexShaderViewProj();
		SetVertexShaderModelViewProjAndModelView();

		// Clear change flags
		m_DynamicState.m_TransformChanged[MATERIAL_PROJECTION] &= ~STATE_CHANGED_VERTEX_SHADER;
	}
	else
	{		
		//GLMPRINTF(( "-M- projChanged=false in CommitPerPassVertexShaderTransforms" ));
	}

	//GLMPRINTF(( "<-M- CommitPerPassVertexShaderTransforms" ));
}

//-----------------------------------------------------------------------------
// Commits vertex shader transforms
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitVertexShaderTransforms()
{
	//GLMPRINTF(( ">-M- CommitVertexShaderTransforms" ));

	Assert( g_pHardwareConfig->Caps().m_SupportsPixelShaders );

	bool viewChanged = VertexShaderTransformChanged(MATERIAL_VIEW);
	bool projChanged = VertexShaderTransformChanged(MATERIAL_PROJECTION);
	bool modelChanged = VertexShaderTransformChanged(MATERIAL_MODEL) && (m_DynamicState.m_NumBones < 1);

	//GLMPRINTF(( "-M-     viewChanged=%s  projChanged=%s  modelChanged = %s in CommitVertexShaderTransforms", viewChanged?"Y":"N",projChanged?"Y":"N",modelChanged?"Y":"N" ));
	if (viewChanged)
	{
		//GLMPRINTF(( "-M-       viewChanged --> UpdateVertexShaderFogParams" ));
		UpdateVertexShaderFogParams();
	}

	if( viewChanged || projChanged )
	{
		// NOTE: We have to deal with fast-clip *before* 
		//GLMPRINTF(( "-M-       viewChanged||projChanged --> SetVertexShaderViewProj" ));
		SetVertexShaderViewProj();
	}

	if( viewChanged || modelChanged || projChanged )
	{
		//GLMPRINTF(( "-M-       viewChanged||projChanged||modelChanged --> SetVertexShaderModelViewProjAndModelView" ));
		SetVertexShaderModelViewProjAndModelView();
	}

	if( modelChanged && m_DynamicState.m_NumBones < 1 )
	{
		UpdateVertexShaderMatrix( 0 );
	}

	// Clear change flags
	m_DynamicState.m_TransformChanged[MATERIAL_MODEL]		&= ~STATE_CHANGED_VERTEX_SHADER;
	m_DynamicState.m_TransformChanged[MATERIAL_VIEW]		&= ~STATE_CHANGED_VERTEX_SHADER;
	m_DynamicState.m_TransformChanged[MATERIAL_PROJECTION]	&= ~STATE_CHANGED_VERTEX_SHADER;

	//GLMPRINTF(( "<-M- CommitVertexShaderTransforms" ));
}


void CShaderAPIDx8::UpdateFixedFunctionMatrix( int iMatrix )
{
	if ( IsX360() )
		return;

	int matrix = MATERIAL_MODEL + iMatrix;
	if ( FixedFunctionTransformChanged( matrix ) )
	{
		SetTransform( D3DTS_WORLDMATRIX(iMatrix), &GetTransform(matrix) );

		// clear the change flag
		m_DynamicState.m_TransformChanged[matrix] &= ~STATE_CHANGED_FIXED_FUNCTION;
	}
}


void CShaderAPIDx8::SetFixedFunctionStateSkinningMatrices()
{
	if ( IsX360() )
		return;

	for( int i=1; i < g_pHardwareConfig->MaxBlendMatrices(); i++ )
	{
		UpdateFixedFunctionMatrix( i );
	}
}

//-----------------------------------------------------------------------------
// Commits transforms for the fixed function pipeline that can happen on a per pass basis
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitPerPassFixedFunctionTransforms()
{
	if ( IsX360() )
		return;

	// Update projection
	if ( FixedFunctionTransformChanged( MATERIAL_PROJECTION ) )
	{
		D3DTRANSFORMSTATETYPE matrix = D3DTS_PROJECTION;

		SetTransform( matrix, &GetProjectionMatrix() );
		// clear the change flag
		m_DynamicState.m_TransformChanged[MATERIAL_PROJECTION] &= ~STATE_CHANGED_FIXED_FUNCTION;
	}
}


//-----------------------------------------------------------------------------
// Commits transforms for the fixed function pipeline
//-----------------------------------------------------------------------------

void CShaderAPIDx8::CommitFixedFunctionTransforms()
{
	if ( IsX360() )
		return;

	// Update view + projection
	int i;
	for ( i = MATERIAL_VIEW; i <= MATERIAL_PROJECTION; ++i)
	{
		if (FixedFunctionTransformChanged( i ))
		{
			D3DTRANSFORMSTATETYPE matrix = (i == MATERIAL_VIEW) ? D3DTS_VIEW : D3DTS_PROJECTION;
			if ( i == MATERIAL_PROJECTION )
			{
				SetTransform( matrix, &GetProjectionMatrix() );
			}
			else
			{
				SetTransform( matrix, &GetTransform(i) );
			}

			// clear the change flag
			m_DynamicState.m_TransformChanged[i] &= ~STATE_CHANGED_FIXED_FUNCTION;
		}
	}

	UpdateFixedFunctionMatrix( 0 );
}


void CShaderAPIDx8::SetSkinningMatrices()
{
	LOCK_SHADERAPI();
	Assert( m_pMaterial );

	if ( m_DynamicState.m_NumBones == 0 )
	{
		// ###OSX### punting on OSX for now
#if defined( DX_TO_GL_ABSTRACTION ) && !defined( OSX)
		Dx9Device()->SetMaxUsedVertexShaderConstantsHint( VERTEX_SHADER_BONE_TRANSFORM( 0 ) + 3 );
#endif
		return;
	}
	
	uint nMaxVertexConstantIndex = 0;

	if ( IsX360() || UsesVertexShader(m_pMaterial->GetVertexFormat()) )
	{
		SetVertexShaderStateSkinningMatrices();
	}
	else if ( IsPC() )
	{
#if defined( DX_TO_GL_ABSTRACTION ) && !defined( OSX)
		Assert( 0 );
#else
		SetFixedFunctionStateSkinningMatrices();
#endif
	}
	else
	{
		Assert( 0 );
	}
}



//-----------------------------------------------------------------------------
// Commits vertex shader lighting
//-----------------------------------------------------------------------------

inline bool CShaderAPIDx8::VertexShaderLightingChanged( int i )
{
	return (m_DynamicState.m_LightChanged[i] & STATE_CHANGED_VERTEX_SHADER) != 0;
}

inline bool CShaderAPIDx8::VertexShaderLightingEnableChanged( int i )
{
	return (m_DynamicState.m_LightEnableChanged[i] & STATE_CHANGED_VERTEX_SHADER) != 0;
}

inline bool CShaderAPIDx8::FixedFunctionLightingChanged( int i )
{
	return (m_DynamicState.m_LightChanged[i] & STATE_CHANGED_FIXED_FUNCTION) != 0;
}

inline bool CShaderAPIDx8::FixedFunctionLightingEnableChanged( int i )
{
	return (m_DynamicState.m_LightEnableChanged[i] & STATE_CHANGED_FIXED_FUNCTION) != 0;
}


//-----------------------------------------------------------------------------
// Computes the light type
//-----------------------------------------------------------------------------

VertexShaderLightTypes_t CShaderAPIDx8::ComputeLightType( int i ) const
{
	if (!m_DynamicState.m_LightEnable[i])
		return LIGHT_NONE;

	switch( m_DynamicState.m_Lights[i].Type )
	{
	case D3DLIGHT_POINT:
		return LIGHT_POINT;

	case D3DLIGHT_DIRECTIONAL:
		return LIGHT_DIRECTIONAL;

	case D3DLIGHT_SPOT:
		return LIGHT_SPOT;
	}

	Assert(0);
	return LIGHT_NONE;
}


//-----------------------------------------------------------------------------
// Sort the lights by type
//-----------------------------------------------------------------------------

void CShaderAPIDx8::SortLights( int* index )
{
	m_DynamicState.m_NumLights = 0;

	for (int i = 0; i < MAX_NUM_LIGHTS; ++i)
	{
		VertexShaderLightTypes_t type = ComputeLightType(i);	// returns LIGHT_NONE if the light is disabled
		int j = m_DynamicState.m_NumLights;
		if (type != LIGHT_NONE)
		{
			while ( --j >= 0 )
			{
				if (m_DynamicState.m_LightType[j] <= type)
					break;

				// shift...
				m_DynamicState.m_LightType[j+1] = m_DynamicState.m_LightType[j];
				index[j+1] = index[j];
			}
			++j;

			m_DynamicState.m_LightType[j] = type;
			index[j] = i;

			++m_DynamicState.m_NumLights;
		}
	}
}

//-----------------------------------------------------------------------------
// Vertex Shader lighting
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitVertexShaderLighting()
{
	// If nothing changed, then don't bother. Otherwise, reload...
	int i;
	for ( i = 0; i < MAX_NUM_LIGHTS; ++i )
	{
		if (VertexShaderLightingChanged(i) || VertexShaderLightingEnableChanged(i))
			break;
	}

	// Yeah baby
	if ( i == MAX_NUM_LIGHTS )
		return;

	// First, gotta sort the lights by their type
	int lightIndex[MAX_NUM_LIGHTS];
	memset( lightIndex, 0, sizeof( lightIndex ) );
	SortLights( lightIndex );

	// Clear the lighting enable flags
	for ( i = 0; i < MAX_NUM_LIGHTS; ++i )
	{
		m_DynamicState.m_LightEnableChanged[i] &= ~STATE_CHANGED_VERTEX_SHADER;
		m_DynamicState.m_LightChanged[i] &= ~STATE_CHANGED_VERTEX_SHADER;
	}

	bool bAtLeastDX90 = g_pHardwareConfig->GetDXSupportLevel() >= 90; 

	// Set the lighting state
	for ( i = 0; i < m_DynamicState.m_NumLights; ++i )
	{
		D3DLIGHT& light = m_DynamicState.m_Lights[lightIndex[i]];

		Vector4D lightState[5];

		// The first one is the light color (and light type code on DX9)
		float w = (light.Type == D3DLIGHT_DIRECTIONAL) && bAtLeastDX90 ? 1.0f : 0.0f;
		lightState[0].Init( light.Diffuse.r, light.Diffuse.g, light.Diffuse.b, w);

		// The next constant holds the light direction (and light type code on DX9)
		w = (light.Type == D3DLIGHT_SPOT) && bAtLeastDX90 ? 1.0f : 0.0f;
		lightState[1].Init( light.Direction.x, light.Direction.y, light.Direction.z, w );

		// The next constant holds the light position
		lightState[2].Init( light.Position.x, light.Position.y, light.Position.z, 1.0f );

		// The next constant holds exponent, stopdot, stopdot2, 1 / (stopdot - stopdot2)
		if (light.Type == D3DLIGHT_SPOT)
		{
			float stopdot = cos( light.Theta * 0.5f );
			float stopdot2 = cos( light.Phi * 0.5f );
			float oodot = (stopdot > stopdot2) ? 1.0f / (stopdot - stopdot2) : 0.0f;
			lightState[3].Init( light.Falloff, stopdot, stopdot2, oodot );
		}
		else
		{
			lightState[3].Init( 0, 1, 1, 1 );
		}

		// The last constant holds attenuation0, attenuation1, attenuation2
		lightState[4].Init( light.Attenuation0, light.Attenuation1, light.Attenuation2, 0.0f );

		// Set the state
		SetVertexShaderConstant( VERTEX_SHADER_LIGHTS + i * 5, lightState[0].Base(), 5 );
	}

	if ( g_pHardwareConfig->NumIntegerVertexShaderConstants() > 0 && g_pHardwareConfig->NumBooleanVertexShaderConstants() > 0 )
	{
		// Vertex Shader loop counter for number of lights (Only the .x component is used by our shaders) 
		// .x is the iteration count, .y is the initial value and .z is the increment step 
		int nLoopControl[4] = {m_DynamicState.m_NumLights, 0, 1, 0};
		SetIntegerVertexShaderConstant( 0, nLoopControl, 1 );

		// Enable lights using vertex shader static flow control
		int nLightEnable[VERTEX_SHADER_LIGHT_ENABLE_BOOL_CONST_COUNT] = {0, 0, 0, 0};
		for ( i = 0; i < m_DynamicState.m_NumLights; ++i )
		{
			nLightEnable[i] = 1;
		}

		SetBooleanVertexShaderConstant( VERTEX_SHADER_LIGHT_ENABLE_BOOL_CONST, nLightEnable, VERTEX_SHADER_LIGHT_ENABLE_BOOL_CONST_COUNT );
	}
}

//-----------------------------------------------------------------------------
// Set the pixel shader constants for lights
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitPixelShaderLighting( int pshReg )
{
#ifndef NDEBUG
	char const *materialName = m_pMaterial->GetName();
#endif

	// First, gotta sort the lights by their type
	int lightIndex[MAX_NUM_LIGHTS];
	SortLights( lightIndex );

	// Offset to create a point light from directional
	const float fFarAway = 10000.0f;

	// Total pixel shader lighting state for four lights
	Vector4D lightState[6];
	for ( int i = 0; i < 6; i++ )
		lightState[i].Init();

	int nNumLights = m_DynamicState.m_NumLights;
	if ( nNumLights > 0  )
	{
		D3DLIGHT *light = &m_DynamicState.m_Lights[lightIndex[0]];
		lightState[0].Init( light->Diffuse.r, light->Diffuse.g, light->Diffuse.b, 0.0f );

		if ( light->Type == D3DLIGHT_DIRECTIONAL )
		{
			Vector vDir(light->Direction.x, light->Direction.y, light->Direction.z );
			Vector vPos = m_DynamicState.m_vLightingOrigin - vDir * fFarAway;
			lightState[1].Init( vPos.x,  vPos.y,  vPos.z, 0.0f );
		}
		else
		{
			lightState[1].Init( light->Position.x,  light->Position.y,  light->Position.z, 0.0f );
		}

		if ( nNumLights > 1 )															// At least two lights
		{
			light = &m_DynamicState.m_Lights[lightIndex[1]];
			lightState[2].Init( light->Diffuse.r, light->Diffuse.g, light->Diffuse.b, 0.0f );

			if ( light->Type == D3DLIGHT_DIRECTIONAL )
			{
				Vector vDir(light->Direction.x, light->Direction.y, light->Direction.z );
				Vector vPos = m_DynamicState.m_vLightingOrigin - vDir * fFarAway;
				lightState[3].Init( vPos.x,  vPos.y,  vPos.z, 0.0f );
			}
			else
			{
				lightState[3].Init( light->Position.x,  light->Position.y,  light->Position.z, 0.0f );
			}

			if ( nNumLights > 2 )														// At least three lights
			{
				light = &m_DynamicState.m_Lights[lightIndex[2]];
				lightState[4].Init( light->Diffuse.r,   light->Diffuse.g,   light->Diffuse.b, 0.0f );

				if ( light->Type == D3DLIGHT_DIRECTIONAL )
				{
					Vector vDir(light->Direction.x, light->Direction.y, light->Direction.z );
					Vector vPos = m_DynamicState.m_vLightingOrigin - vDir * fFarAway;
					lightState[5].Init( vPos.x,  vPos.y,  vPos.z, 0.0f );
				}
				else
				{
					lightState[5].Init( light->Position.x,  light->Position.y,  light->Position.z, 0.0f );
				}

				if ( nNumLights > 3 )													// At least four lights (our current max)
				{
					light = &m_DynamicState.m_Lights[lightIndex[3]];		// Spread 4th light's constants across w components
					lightState[0].w = light->Diffuse.r;
					lightState[1].w = light->Diffuse.g;
					lightState[2].w = light->Diffuse.b;

					if ( light->Type == D3DLIGHT_DIRECTIONAL )
					{
						Vector vDir(light->Direction.x, light->Direction.y, light->Direction.z );
						Vector vPos = m_DynamicState.m_vLightingOrigin - vDir * fFarAway;
						lightState[3].w = vPos.x;
						lightState[4].w = vPos.y;
						lightState[5].w = vPos.z;
					}
					else
					{
						lightState[3].w = light->Position.x;
						lightState[4].w = light->Position.y;
						lightState[5].w = light->Position.z;
					}
				}
			}
		}
	}
	SetPixelShaderConstant( pshReg, lightState[0].Base(), 6 );
}

//-----------------------------------------------------------------------------
// Fixed function lighting
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitFixedFunctionLighting()
{	
	if ( IsX360() )
	{
		return;
	}

	// Commit each light
	for (int i = 0; i < g_pHardwareConfig->MaxNumLights(); ++i)
	{
		// Change light enable
		if ( FixedFunctionLightingEnableChanged( i ) )
		{
			LightEnable( i, m_DynamicState.m_LightEnable[i] );

			// Clear change flag
			m_DynamicState.m_LightEnableChanged[i] &= ~STATE_CHANGED_FIXED_FUNCTION;
		}

		// Change lighting state...
		if ( m_DynamicState.m_LightEnable[i] )
		{
			if ( FixedFunctionLightingChanged( i ) )
			{
				// Store off the "correct" falloff...
				D3DLIGHT& light = m_DynamicState.m_Lights[i];

				float falloff = light.Falloff;

				SetLight( i, &light );

				// Clear change flag
				m_DynamicState.m_LightChanged[i] &= ~STATE_CHANGED_FIXED_FUNCTION;

				// restore the correct falloff
				light.Falloff = falloff;
			}
		}
	}
}


//-----------------------------------------------------------------------------
// Commits user clip planes
//-----------------------------------------------------------------------------
D3DXMATRIX& CShaderAPIDx8::GetUserClipTransform( )
{
	if ( !m_DynamicState.m_bUserClipTransformOverride )
		return GetTransform(MATERIAL_VIEW);

	return m_DynamicState.m_UserClipTransform;
}


//-----------------------------------------------------------------------------
// Commits user clip planes
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitUserClipPlanes( bool bUsingFixedFunction )
{
	// We need to transform the clip planes, specified in world space,
	// to be in projection space.. To transform the plane, we must transform
	// the intercept and then transform the normal.

	if( bUsingFixedFunction != m_DynamicState.m_UserClipLastUpdatedUsingFixedFunction )
	{
		//fixed function clip planes are in world space, vertex shader clip planes are in clip space, so we need to update every clip plane whenever there's a flip
		m_DynamicState.m_UserClipPlaneChanged = (1 << g_pHardwareConfig->MaxUserClipPlanes()) - 1;
		m_DynamicState.m_UserClipLastUpdatedUsingFixedFunction = bUsingFixedFunction;
	}

	D3DXMATRIX worldToProjectionInvTrans;
#ifndef _DEBUG
	if( m_DynamicState.m_UserClipPlaneChanged & m_DynamicState.m_UserClipPlaneEnabled & ((1 << g_pHardwareConfig->MaxUserClipPlanes()) - 1) )
#endif
	{
		//we're going to need the transformation matrix at least once this call
		if( bUsingFixedFunction )
		{
			if( m_DynamicState.m_bUserClipTransformOverride )
			{
				//D3DXMatrixIdentity( &worldToProjectionInvTrans ); //TODO: Test user clip transforms with this
				//Since GetUserClipTransform() returns the view matrix if a user supplied transform doesn't exist, the general solution to this should be to transform the user transform by the inverse view matrix
				//Since we don't know if the user clip is invertable, we'll premultiply by inverse view and cross our fingers that it's right more often than wrong
				D3DXMATRIX viewInverse = GetTransform( MATERIAL_VIEW );
				D3DXMatrixInverse(&viewInverse, NULL, &viewInverse);
				worldToProjectionInvTrans = viewInverse * GetUserClipTransform(); //taking a cue from the multiplication below, multiplication goes left into right
				
				D3DXMatrixInverse(&worldToProjectionInvTrans, NULL, &worldToProjectionInvTrans);
				D3DXMatrixTranspose(&worldToProjectionInvTrans, &worldToProjectionInvTrans);
			}
			else
			{
				D3DXMatrixIdentity( &worldToProjectionInvTrans );
			}
		}
		else
		{
			worldToProjectionInvTrans = GetUserClipTransform( ) * GetTransform( MATERIAL_PROJECTION );
			D3DXMatrixInverse(&worldToProjectionInvTrans, NULL, &worldToProjectionInvTrans);
			D3DXMatrixTranspose(&worldToProjectionInvTrans, &worldToProjectionInvTrans);
		}
	}

	for (int i = 0; i < g_pHardwareConfig->MaxUserClipPlanes(); ++i)
	{
		// Don't bother with the plane if it's not enabled
		if ( (m_DynamicState.m_UserClipPlaneEnabled & (1 << i)) == 0 )
			continue;

		// Don't bother if it didn't change...
		if ( (m_DynamicState.m_UserClipPlaneChanged & (1 << i)) == 0 )
		{
#ifdef _DEBUG
			//verify that the plane has not actually changed
			D3DXPLANE clipPlaneProj;
			D3DXPlaneTransform( &clipPlaneProj, &m_DynamicState.m_UserClipPlaneWorld[i], &worldToProjectionInvTrans );
			Assert ( clipPlaneProj == m_DynamicState.m_UserClipPlaneProj[i] );
#endif
			continue;
		}

		m_DynamicState.m_UserClipPlaneChanged &= ~(1 << i);		

		D3DXPLANE clipPlaneProj;
		D3DXPlaneTransform( &clipPlaneProj, &m_DynamicState.m_UserClipPlaneWorld[i], &worldToProjectionInvTrans );

		if ( clipPlaneProj != m_DynamicState.m_UserClipPlaneProj[i] )
		{
			Dx9Device()->SetClipPlane( i, (float*)clipPlaneProj );
			m_DynamicState.m_UserClipPlaneProj[i] = clipPlaneProj;
		}
	}
}


//-----------------------------------------------------------------------------
// Need to handle fog mode on a per-pass basis
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitPerPassFogMode( bool bUsingVertexAndPixelShaders )
{
	if ( IsX360() )
	{
		// FF fog not applicable on 360
		return;
	}

	D3DFOGMODE dxFogMode = D3DFOG_NONE;
	if ( m_DynamicState.m_FogEnable )
	{
		dxFogMode = bUsingVertexAndPixelShaders ? D3DFOG_NONE : D3DFOG_LINEAR;
	}

	// Set fog mode if it's different than before.
	if( m_DynamicState.m_FogMode != dxFogMode )
	{
		SetRenderStateConstMacro( this, D3DRS_FOGVERTEXMODE, dxFogMode );
		m_DynamicState.m_FogMode = dxFogMode;
	}
}

//-----------------------------------------------------------------------------
// Handle Xbox GPU/DX API fixups necessary before actual draw.
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitPerPassXboxFixups()
{
#if defined( _X360 )
	// send updated shader constants to gpu
	WriteShaderConstantsToGPU();

	// sRGB write state may have changed after RT set, have to re-set correct RT
	SetRenderTargetInternalXbox( m_hCachedRenderTarget );
#endif
}

//-----------------------------------------------------------------------------
// These states can change between each pass
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitPerPassStateChanges( StateSnapshot_t id )
{
	if ( IsX360() || UsesVertexAndPixelShaders(id) )
	{
		CommitPerPassVertexShaderTransforms();
		CommitPerPassFogMode( true );
		CommitPerPassXboxFixups();
		CallCommitFuncs( COMMIT_PER_PASS, false );
	}
	else if ( IsPC() )
	{
		CommitPerPassFixedFunctionTransforms();
		CommitPerPassFogMode( false );
		CallCommitFuncs( COMMIT_PER_PASS, true );
	}
	else
	{
		Assert( 0 );
	}
}


//-----------------------------------------------------------------------------
// Commits transforms and lighting
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CommitStateChanges()
{
	VPROF("CShaderAPIDx8::CommitStateChanges");
	CommitFastClipPlane();

	bool bUsingFixedFunction = !IsX360() && m_pMaterial && !UsesVertexShader( m_pMaterial->GetVertexFormat() );

	// xboxissue - cannot support ff pipeline
	Assert ( IsPC() || ( IsX360() && !bUsingFixedFunction ) );

	if ( IsX360() || !bUsingFixedFunction )
	{
		CommitVertexShaderTransforms();

		if ( m_pMaterial && m_pMaterial->IsVertexLit() )
		{
			CommitVertexShaderLighting();
		}
	}
	else if ( IsPC() )
	{
		CommitFixedFunctionTransforms();

		if ( m_pMaterial && ( m_pMaterial->IsVertexLit() || m_pMaterial->NeedsFixedFunctionFlashlight() ) )
		{
			CommitFixedFunctionLighting();
		}
	}
	else
	{
		Assert( 0 );
	}

	if ( m_DynamicState.m_UserClipPlaneEnabled )
	{
		CommitUserClipPlanes( bUsingFixedFunction );
	}

	CallCommitFuncs( COMMIT_PER_DRAW, bUsingFixedFunction );
}

//-----------------------------------------------------------------------------
// Commits viewports
//-----------------------------------------------------------------------------
static void CommitSetViewports( IDirect3DDevice9 *pDevice, const DynamicState_t &desiredState, DynamicState_t &currentState, bool bForce )
{
	bool bChanged = bForce || memcmp( &desiredState.m_Viewport, &currentState.m_Viewport, sizeof(D3DVIEWPORT9) );

	// The width + height can be zero at startup sometimes.
	if ( bChanged && ( desiredState.m_Viewport.Width != 0 ) && ( desiredState.m_Viewport.Height != 0 ) )
	{
		if( ReverseDepthOnX360() ) //reverse depth on 360 for better perf through hierarchical z
		{
			D3DVIEWPORT9 reverseDepthViewport;
			reverseDepthViewport = desiredState.m_Viewport;
			reverseDepthViewport.MinZ = 1.0f - desiredState.m_Viewport.MinZ;
			reverseDepthViewport.MaxZ = 1.0f - desiredState.m_Viewport.MaxZ;
			Dx9Device()->SetViewport( &reverseDepthViewport );
		}
		else
		{
			Dx9Device()->SetViewport( &desiredState.m_Viewport );
		}
		memcpy( &currentState.m_Viewport, &desiredState.m_Viewport, sizeof( D3DVIEWPORT9 ) );
	}
}


void CShaderAPIDx8::SetViewports( int nCount, const ShaderViewport_t* pViewports )
{
	Assert( nCount == 1 && pViewports[0].m_nVersion == SHADER_VIEWPORT_VERSION );
	if ( nCount != 1 )
		return;

	LOCK_SHADERAPI();

	D3DVIEWPORT9 viewport;
	viewport.X = pViewports[0].m_nTopLeftX;
	viewport.Y = pViewports[0].m_nTopLeftY;
	viewport.Width = pViewports[0].m_nWidth;
	viewport.Height = pViewports[0].m_nHeight;
	viewport.MinZ = pViewports[0].m_flMinZ; 
	viewport.MaxZ = pViewports[0].m_flMaxZ;

	// Clamp the viewport to the current render target...
	if ( !m_UsingTextureRenderTarget )
	{
		// Clamp to both the back buffer and the window, if it is resizing
		int nMaxWidth = 0, nMaxHeight = 0;
		GetBackBufferDimensions( nMaxWidth, nMaxHeight );
		if ( IsPC() && m_IsResizing )
		{
			RECT viewRect;
#if !defined( DX_TO_GL_ABSTRACTION )
			GetClientRect( ( HWND )m_ViewHWnd, &viewRect );
#else
			toglGetClientRect( (VD3DHWND)m_ViewHWnd, &viewRect );
#endif
			m_nWindowWidth = viewRect.right - viewRect.left;
			m_nWindowHeight = viewRect.bottom - viewRect.top;
			nMaxWidth = min( m_nWindowWidth, nMaxWidth );
			nMaxHeight = min( m_nWindowHeight, nMaxHeight );
		}

		// Dimensions can freak out on app exit, so at least make sure the viewport is positive
		if ( (viewport.Width > (unsigned int)nMaxWidth ) && (nMaxWidth > 0) )
		{
			viewport.Width = nMaxWidth;
		}

		// Dimensions can freak out on app exit, so at least make sure the viewport is positive
		if ( ( viewport.Height > (unsigned int)nMaxHeight ) && (nMaxHeight > 0) )
		{
			viewport.Height = nMaxHeight;
		}
	}
	else
	{
		if ( viewport.Width > (unsigned int)m_ViewportMaxWidth )
		{
			viewport.Width = m_ViewportMaxWidth;
		}
		if ( viewport.Height > (unsigned int)m_ViewportMaxHeight )
		{
			viewport.Height = m_ViewportMaxHeight;
		}
	}

	// FIXME: Once we extract buffered primitives out, we can directly fill in desired state
	// and avoid the memcmp and copy
	if ( memcmp( &m_DesiredState.m_Viewport, &viewport, sizeof(D3DVIEWPORT9) ) )
	{
		if ( !IsDeactivated() )
		{
			// State changed... need to flush the dynamic buffer
			FlushBufferedPrimitives();
		}

		memcpy( &m_DesiredState.m_Viewport, &viewport, sizeof(D3DVIEWPORT9) );
	}

	ADD_COMMIT_FUNC( COMMIT_PER_DRAW, COMMIT_ALWAYS, CommitSetViewports );
}


//-----------------------------------------------------------------------------
// Gets the current viewport size
//-----------------------------------------------------------------------------
int CShaderAPIDx8::GetViewports( ShaderViewport_t* pViewports, int nMax ) const
{
	if ( !pViewports || nMax == 0 )
		return 1;

	LOCK_SHADERAPI();

	pViewports[0].m_nTopLeftX = m_DesiredState.m_Viewport.X;
	pViewports[0].m_nTopLeftY = m_DesiredState.m_Viewport.Y;
	pViewports[0].m_nWidth = m_DesiredState.m_Viewport.Width;
	pViewports[0].m_nHeight = m_DesiredState.m_Viewport.Height;
	pViewports[0].m_flMinZ = m_DesiredState.m_Viewport.MinZ;
	pViewports[0].m_flMaxZ = m_DesiredState.m_Viewport.MaxZ;
	return 1;
}


//-----------------------------------------------------------------------------
// Flushes buffered primitives
//-----------------------------------------------------------------------------
void CShaderAPIDx8::FlushBufferedPrimitives( )
{
	if ( ShaderUtil() )
	{
		if ( !ShaderUtil()->OnFlushBufferedPrimitives() )
		{
			return;
		}
	}
	FlushBufferedPrimitivesInternal();
}

void CShaderAPIDx8::FlushBufferedPrimitivesInternal( )
{
	LOCK_SHADERAPI();
	// This shouldn't happen in the inner rendering loop!
	Assert( m_pRenderMesh == 0 );

	// NOTE: We've gotta store off the matrix mode because
	// it'll get reset by the default state application caused by the flush
	int tempStack = m_CurrStack;
	D3DTRANSFORMSTATETYPE tempMatrixMode = m_MatrixMode;

	MeshMgr()->Flush();

	m_CurrStack = tempStack;
	m_MatrixMode = tempMatrixMode;
}


//-----------------------------------------------------------------------------
// Flush the hardware
//-----------------------------------------------------------------------------
void CShaderAPIDx8::FlushHardware( )
{
	LOCK_SHADERAPI();
	FlushBufferedPrimitives();

	Dx9Device()->EndScene();

	DiscardVertexBuffers();

	Dx9Device()->BeginScene();
	
	ForceHardwareSync();
}


	
//-----------------------------------------------------------------------------
// Deal with device lost (alt-tab)
//-----------------------------------------------------------------------------
void CShaderAPIDx8::HandleDeviceLost()
{
	if ( IsX360() )
	{
		return;
	}

	LOCK_SHADERAPI();

	if ( !IsActive() )
		return;

	// need to flush the dynamic buffer
	FlushBufferedPrimitives();

	if ( !IsDeactivated() )
	{
		Dx9Device()->EndScene();
	}

	CheckDeviceLost( m_bOtherAppInitializing );

	if ( !IsDeactivated() )
	{
		Dx9Device()->BeginScene();
	}
}

//-----------------------------------------------------------------------------
// Buffer clear	color
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ClearColor3ub( unsigned char r, unsigned char g, unsigned char b )
{
	LOCK_SHADERAPI();
	float a = 255;//(r * 0.30f + g * 0.59f + b * 0.11f) / MAX_HDR_OVERBRIGHT;

	// GR - need to force alpha to black for HDR
	m_DynamicState.m_ClearColor = D3DCOLOR_ARGB((unsigned char)a,r,g,b);
}

void CShaderAPIDx8::ClearColor4ub( unsigned char r, unsigned char g, unsigned char b, unsigned char a )
{
	LOCK_SHADERAPI();
	m_DynamicState.m_ClearColor = D3DCOLOR_ARGB(a,r,g,b);
}

// Converts the clear color to be appropriate for HDR
D3DCOLOR CShaderAPIDx8::GetActualClearColor( D3DCOLOR clearColor )
{
	bool bConvert = !IsX360() && m_TransitionTable.CurrentState().m_bLinearColorSpaceFrameBufferEnable;

#if defined( _X360 )
	// The PC disables SRGBWrite when clearing so that the clear color won't get gamma converted
	// The 360 cannot disable that state, and thus compensates for the sRGB conversion
	// the desired result is the clear color written to the RT as-is
	if ( clearColor & D3DCOLOR_ARGB( 0, 255, 255, 255 ) )
	{
		IDirect3DSurface *pRTSurface = NULL;
		Dx9Device()->GetRenderTarget( 0, &pRTSurface );
		if ( pRTSurface )
		{
			D3DSURFACE_DESC desc;
			HRESULT hr = pRTSurface->GetDesc( &desc );
			if ( !FAILED( hr ) && IS_D3DFORMAT_SRGB( desc.Format ) )
			{
				bConvert = true;
			}
			pRTSurface->Release();
		}
	}
#endif

	if ( bConvert )
	{
		// HDRFIXME: need to make sure this works this way.
		// HDRFIXME: Is there a helper function that'll do this easier?
		// convert clearColor from gamma to linear since our frame buffer is linear.
		Vector vecGammaColor;
		vecGammaColor.x = ( 1.0f / 255.0f ) * ( ( clearColor >> 16 ) & 0xff );
		vecGammaColor.y = ( 1.0f / 255.0f ) * ( ( clearColor >> 8 ) & 0xff );
		vecGammaColor.z = ( 1.0f / 255.0f ) * ( clearColor & 0xff );
		Vector vecLinearColor;
		vecLinearColor.x = GammaToLinear( vecGammaColor.x );
		vecLinearColor.y = GammaToLinear( vecGammaColor.y );
		vecLinearColor.z = GammaToLinear( vecGammaColor.z );
		clearColor &= D3DCOLOR_RGBA( 0, 0, 0, 255 );
		clearColor |= D3DCOLOR_COLORVALUE( vecLinearColor.x, vecLinearColor.y, vecLinearColor.z, 0.0f );
	}

	return clearColor;
}


//-----------------------------------------------------------------------------
// Clear buffers while obeying stencil
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ClearBuffersObeyStencil( bool bClearColor, bool bClearDepth )
{
	//copy the clear color bool into the clear alpha bool
	ClearBuffersObeyStencilEx( bClearColor, bClearColor, bClearDepth );
}

void CShaderAPIDx8::ClearBuffersObeyStencilEx( bool bClearColor, bool bClearAlpha, bool bClearDepth )
{
	LOCK_SHADERAPI();

	if ( !bClearColor && !bClearAlpha && !bClearDepth )
		return;

	FlushBufferedPrimitives();

	// Before clearing can happen, user clip planes must be disabled
	SetRenderState( D3DRS_CLIPPLANEENABLE, 0 );

	D3DCOLOR clearColor = GetActualClearColor( m_DynamicState.m_ClearColor );

	unsigned char r, g, b, a;
	b = clearColor& 0xFF;
	g = ( clearColor >> 8 ) & 0xFF;
	r = ( clearColor >> 16 ) & 0xFF;
	a = ( clearColor >> 24 ) & 0xFF;

	ShaderUtil()->DrawClearBufferQuad( r, g, b, a, bClearColor, bClearAlpha, bClearDepth );

	// Reset user clip plane state
	FlushBufferedPrimitives();
	SetRenderState( D3DRS_CLIPPLANEENABLE, m_DynamicState.m_UserClipPlaneEnabled );
}

//-------------------------------------------------------------------------
//Perform stencil operations to every pixel on the screen
//-------------------------------------------------------------------------
void CShaderAPIDx8::PerformFullScreenStencilOperation( void )
{
	LOCK_SHADERAPI();

	FlushBufferedPrimitives();

	// We'll be drawing a large quad in altered worldspace, user clip planes must be disabled
	SetRenderStateConstMacro( this, D3DRS_CLIPPLANEENABLE, 0 );

	ShaderUtil()->DrawClearBufferQuad( 0, 0, 0, 0, false, false, false );

	// Reset user clip plane state
	FlushBufferedPrimitives();
	SetRenderStateConstMacro( this, D3DRS_CLIPPLANEENABLE, m_DynamicState.m_UserClipPlaneEnabled );
}


//-----------------------------------------------------------------------------
// Buffer clear
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ClearBuffers( bool bClearColor, bool bClearDepth, bool bClearStencil, int renderTargetWidth, int renderTargetHeight )
{
	LOCK_SHADERAPI();
	if ( ShaderUtil()->GetConfig().m_bSuppressRendering )
		return;

	if ( IsDeactivated() )
		return;

	// State changed... need to flush the dynamic buffer
	FlushBufferedPrimitives();
	CallCommitFuncs( COMMIT_PER_DRAW, true );

	float depth = (ShaderUtil()->GetConfig().bReverseDepth ^ ReverseDepthOnX360()) ? 0.0f : 1.0f;
	DWORD mask = 0;

	if ( bClearColor )
	{
		mask |= D3DCLEAR_TARGET;
	}

	if ( bClearDepth )
	{
		mask |= D3DCLEAR_ZBUFFER;
	}
	
	if ( bClearStencil && m_bUsingStencil )
	{
		mask |= D3DCLEAR_STENCIL;
	}

	// Only clear the current view... right!??!
	D3DRECT clear;
	clear.x1 = m_DesiredState.m_Viewport.X;
	clear.y1 = m_DesiredState.m_Viewport.Y;
	clear.x2 = clear.x1 + m_DesiredState.m_Viewport.Width;
	clear.y2 = clear.y1 + m_DesiredState.m_Viewport.Height;

	// SRGBWrite is disabled when clearing so that the clear color won't get gamma converted
	bool bSRGBWriteEnable = false;
	if ( !IsX360() && bClearColor && m_TransitionTable.CurrentShadowState() )
	{
		bSRGBWriteEnable = m_TransitionTable.CurrentShadowState()->m_SRGBWriteEnable;
	}
	
#if !defined( _X360 )
	if ( bSRGBWriteEnable )
	{
		// This path used to be !IsPosix(), but this makes no sense and causes the clear color to differ in D3D9 vs. GL.
		Dx9Device()->SetRenderState( D3DRS_SRGBWRITEENABLE, 0 );
	}
#endif
	
	D3DCOLOR clearColor = GetActualClearColor( m_DynamicState.m_ClearColor );

	if ( mask != 0 )
	{
		bool bRenderTargetMatchesViewport = 
			( renderTargetWidth == -1 && renderTargetHeight == -1 ) ||
			( m_DesiredState.m_Viewport.Width == -1 && m_DesiredState.m_Viewport.Height == -1 ) ||
			( renderTargetWidth ==  ( int )m_DesiredState.m_Viewport.Width &&
			  renderTargetHeight == ( int )m_DesiredState.m_Viewport.Height );

		if ( bRenderTargetMatchesViewport )
		{
			RECORD_COMMAND( DX8_CLEAR, 6 );
			RECORD_INT( 0 );
			RECORD_STRUCT( &clear, sizeof(clear) );
			RECORD_INT( mask );
			RECORD_INT( clearColor );
			RECORD_FLOAT( depth );
			RECORD_INT( 0 );

			Dx9Device()->Clear( 0, NULL, mask, clearColor, depth, 0L );	
		}
		else
		{
			RECORD_COMMAND( DX8_CLEAR, 6 );
			RECORD_INT( 0 );
			RECORD_STRUCT( &clear, sizeof(clear) );
			RECORD_INT( mask );
			RECORD_INT( clearColor );
			RECORD_FLOAT( depth );
			RECORD_INT( 0 );

			Dx9Device()->Clear( 1, &clear, mask, clearColor, depth, 0L );	
		}
	}

	// Restore state
	if ( bSRGBWriteEnable )
	{
		// sRGBWriteEnable shouldn't be true if we have no shadow state. . . Assert just in case.
		Assert( m_TransitionTable.CurrentShadowState() );
		m_TransitionTable.ApplySRGBWriteEnable( *m_TransitionTable.CurrentShadowState() );
	}
}


//-----------------------------------------------------------------------------
// Bind
//-----------------------------------------------------------------------------
void CShaderAPIDx8::BindVertexShader( VertexShaderHandle_t hVertexShader )
{
	ShaderManager()->BindVertexShader( hVertexShader );
}

void CShaderAPIDx8::BindGeometryShader( GeometryShaderHandle_t hGeometryShader )
{
	Assert( hGeometryShader == GEOMETRY_SHADER_HANDLE_INVALID );
}

void CShaderAPIDx8::BindPixelShader( PixelShaderHandle_t hPixelShader )
{
	ShaderManager()->BindPixelShader( hPixelShader );
}


//-----------------------------------------------------------------------------
// Returns a copy of the front buffer
//-----------------------------------------------------------------------------
IDirect3DSurface* CShaderAPIDx8::GetFrontBufferImage( ImageFormat& format )
{
#if !defined( _X360 )
	// need to flush the dynamic buffer	and make sure the entire image is there
	FlushBufferedPrimitives();

	int w, h;
	GetBackBufferDimensions( w, h );

	HRESULT hr;
	IDirect3DSurface *pFullScreenSurfaceBits = 0;
	hr = Dx9Device()->CreateOffscreenPlainSurface( w, h, 
		D3DFMT_A8R8G8B8, D3DPOOL_SCRATCH, &pFullScreenSurfaceBits, NULL );
	if (FAILED(hr))
		return 0;

	hr = Dx9Device()->GetFrontBufferData( 0, pFullScreenSurfaceBits );
	if (FAILED(hr))
		return 0;

	int windowWidth, windowHeight;
	GetWindowSize( windowWidth, windowHeight );
	
	IDirect3DSurface *pSurfaceBits = 0;
	hr = Dx9Device()->CreateOffscreenPlainSurface( windowWidth, windowHeight,
		D3DFMT_A8R8G8B8, D3DPOOL_SCRATCH, &pSurfaceBits, NULL );
	Assert( hr == D3D_OK );
	
	POINT pnt;
	pnt.x = pnt.y = 0;
#ifdef _WIN32
	BOOL result = ClientToScreen( ( HWND )m_hWnd, &pnt );
#else
	BOOL result = ClientToScreen( (VD3DHWND)m_hWnd, &pnt );
#endif
	Assert( result );

	RECT srcRect;
	srcRect.left = pnt.x;
	srcRect.top = pnt.y;
	srcRect.right = pnt.x + windowWidth;
	srcRect.bottom = pnt.y + windowHeight;

	POINT dstPnt;
	dstPnt.x = dstPnt.y = 0;
	
	D3DLOCKED_RECT lockedSrcRect;
	hr = pFullScreenSurfaceBits->LockRect( &lockedSrcRect, &srcRect, D3DLOCK_READONLY );
	Assert( hr == D3D_OK );
	
	D3DLOCKED_RECT lockedDstRect;
	hr = pSurfaceBits->LockRect( &lockedDstRect, NULL, 0 );
	Assert( hr == D3D_OK );

	int i;
	for( i = 0; i < windowHeight; i++ )
	{
		memcpy( ( unsigned char * )lockedDstRect.pBits + ( i * lockedDstRect.Pitch ), 
			    ( unsigned char * )lockedSrcRect.pBits + ( i * lockedSrcRect.Pitch ),
				windowWidth * 4 ); // hack . .  what if this is a different format?
	}
	hr = pSurfaceBits->UnlockRect();
	Assert( hr == D3D_OK );
	hr = pFullScreenSurfaceBits->UnlockRect();
	Assert( hr == D3D_OK );

	pFullScreenSurfaceBits->Release();

	format = ImageLoader::D3DFormatToImageFormat( D3DFMT_A8R8G8B8 );
	return pSurfaceBits;
#else
	Assert( 0 );
	return NULL;
#endif
}


//-----------------------------------------------------------------------------
// Lets the shader know about the full-screen texture so it can 
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetFullScreenTextureHandle( ShaderAPITextureHandle_t h )
{
	LOCK_SHADERAPI();
	m_hFullScreenTexture = h;
}


//-----------------------------------------------------------------------------
// Lets the shader know about the full-screen texture so it can 
//-----------------------------------------------------------------------------
void CShaderAPIDx8::SetLinearToGammaConversionTextures( ShaderAPITextureHandle_t hSRGBWriteEnabledTexture, ShaderAPITextureHandle_t hIdentityTexture )
{
	LOCK_SHADERAPI();

	m_hLinearToGammaTableTexture = hSRGBWriteEnabledTexture;
	m_hLinearToGammaTableIdentityTexture = hIdentityTexture;
}

//-----------------------------------------------------------------------------
// Returns a copy of the back buffer
//-----------------------------------------------------------------------------
IDirect3DSurface* CShaderAPIDx8::GetBackBufferImageHDR( Rect_t *pSrcRect, Rect_t *pDstRect, ImageFormat& format )
{
#if !defined( _X360 )
	HRESULT hr;
	IDirect3DSurface *pSurfaceBits = 0;
	IDirect3DSurface *pTmpSurface = NULL;

	// Get the back buffer
	IDirect3DSurface* pBackBuffer;
	hr = Dx9Device()->GetRenderTarget( 0, &pBackBuffer );
	if (FAILED(hr))
		return 0;

	// Find about its size and format
	D3DSURFACE_DESC desc;
	D3DTEXTUREFILTERTYPE filter;
	
	hr = pBackBuffer->GetDesc( &desc );
	if (FAILED(hr))
		goto CleanUp;

	filter = ((pDstRect->width != pSrcRect->width) || (pDstRect->height != pSrcRect->height)) ? D3DTEXF_LINEAR : D3DTEXF_NONE;

	if ( ( pDstRect->x + pDstRect->width <= SMALL_BACK_BUFFER_SURFACE_WIDTH ) && 
		 ( pDstRect->y + pDstRect->height <= SMALL_BACK_BUFFER_SURFACE_HEIGHT ) )
	{
		if (!m_pSmallBackBufferFP16TempSurface)
		{
			hr = Dx9Device()->CreateRenderTarget( 
				SMALL_BACK_BUFFER_SURFACE_WIDTH, SMALL_BACK_BUFFER_SURFACE_HEIGHT, 
				desc.Format, D3DMULTISAMPLE_NONE, 0, TRUE, &m_pSmallBackBufferFP16TempSurface,
				NULL );
		}
		pTmpSurface = m_pSmallBackBufferFP16TempSurface;
#if POSIX
		pTmpSurface->AddRef( 0, "CShaderAPIDx8::GetBackBufferImageHDR public addref");
#else
		pTmpSurface->AddRef();
#endif

		desc.Width = SMALL_BACK_BUFFER_SURFACE_WIDTH;
		desc.Height = SMALL_BACK_BUFFER_SURFACE_HEIGHT;

		RECT srcRect, destRect;
		RectToRECT( pSrcRect, srcRect );
		RectToRECT( pDstRect, destRect );
		hr = Dx9Device()->StretchRect( pBackBuffer, &srcRect, pTmpSurface, &destRect, filter );
		if ( FAILED(hr) )
			goto CleanUp;
	}
	else
	{
		// Normally we would only have to create a separate render target here and StretchBlt to it first
		// if AA was enabled, but certain machines/drivers get reboots if we do GetRenderTargetData 
		// straight off the backbuffer.
		hr = Dx9Device()->CreateRenderTarget( desc.Width, desc.Height, desc.Format,
			D3DMULTISAMPLE_NONE, 0, TRUE, &pTmpSurface, NULL );
		if ( FAILED(hr) )
			goto CleanUp;

		hr = Dx9Device()->StretchRect( pBackBuffer, NULL, pTmpSurface, NULL, filter );
		if ( FAILED(hr) )
			goto CleanUp;
	}

	// Create a buffer the same size and format
	hr = Dx9Device()->CreateOffscreenPlainSurface( desc.Width, desc.Height, 
		desc.Format, D3DPOOL_SYSTEMMEM, &pSurfaceBits, NULL );
	if (FAILED(hr))
		goto CleanUp;

	// Blit from the back buffer to our scratch buffer
	hr = Dx9Device()->GetRenderTargetData( pTmpSurface ? pTmpSurface : pBackBuffer, pSurfaceBits );
	if (FAILED(hr))
		goto CleanUp2;

	format = ImageLoader::D3DFormatToImageFormat(desc.Format);
	if ( pTmpSurface )
	{
		pTmpSurface->Release();
	}
	pBackBuffer->Release();
	return pSurfaceBits;

CleanUp2:
	pSurfaceBits->Release();

CleanUp:
	if ( pTmpSurface )
	{
		pTmpSurface->Release();
	}

	pBackBuffer->Release();
#else
	Assert( 0 );
#endif
	return 0;
}


//-----------------------------------------------------------------------------
// Returns a copy of the back buffer
//-----------------------------------------------------------------------------
IDirect3DSurface* CShaderAPIDx8::GetBackBufferImage( Rect_t *pSrcRect, Rect_t *pDstRect, ImageFormat& format )
{
#if !defined( _X360 )
	if ( !m_pBackBufferSurface || ( m_hFullScreenTexture == INVALID_SHADERAPI_TEXTURE_HANDLE ) )
		return NULL;

	HRESULT hr;
	D3DSURFACE_DESC desc;

	FlushBufferedPrimitives();

	// Get the current render target
	IDirect3DSurface* pRenderTarget;
	hr = Dx9Device()->GetRenderTarget( 0, &pRenderTarget );
	if (FAILED(hr))
		return 0;

	// Find about its size and format
	hr = pRenderTarget->GetDesc( &desc );

	if ( desc.Format == D3DFMT_A16B16G16R16F || desc.Format == D3DFMT_A32B32G32R32F )
		return GetBackBufferImageHDR( pSrcRect, pDstRect, format );

	IDirect3DSurface *pSurfaceBits = NULL;
	IDirect3DSurface *pTmpSurface = NULL;
	int nRenderTargetRefCount;
	nRenderTargetRefCount = 0;

	if ( (desc.MultiSampleType == D3DMULTISAMPLE_NONE) && (pRenderTarget != m_pBackBufferSurface) && 
		 (pSrcRect->width == pDstRect->width) && (pSrcRect->height == pDstRect->height) )
	{
		// Don't bother to blit through the full-screen texture if we don't
		// have to stretch, we're not coming from the backbuffer, and we don't have to do AA resolve
		pTmpSurface = pRenderTarget;
#if POSIX
		pTmpSurface->AddRef( 0, "CShaderAPIDx8::GetBackBufferImage public addref");
#else
		pTmpSurface->AddRef();
#endif
	}
	else
	{
		Texture_t *pTex = &GetTexture( m_hFullScreenTexture );
		IDirect3DTexture* pFullScreenTexture = (IDirect3DTexture*)pTex->GetTexture();

		D3DTEXTUREFILTERTYPE filter = ((pDstRect->width != pSrcRect->width) || (pDstRect->height != pSrcRect->height)) ? D3DTEXF_LINEAR : D3DTEXF_NONE;

		hr = pFullScreenTexture->GetSurfaceLevel( 0, &pTmpSurface ); 
		if ( FAILED(hr) )
			goto CleanUp;

		if ( pTmpSurface == pRenderTarget )
		{
			Warning( "Can't blit from full-sized offscreen buffer!\n" );
			goto CleanUp;
		}

		RECT srcRect, destRect;
		srcRect.left = pSrcRect->x; srcRect.right = pSrcRect->x + pSrcRect->width;
		srcRect.top = pSrcRect->y; srcRect.bottom = pSrcRect->y + pSrcRect->height;
		srcRect.left = clamp( srcRect.left, 0, (int)desc.Width );
		srcRect.right = clamp( srcRect.right, 0, (int)desc.Width );
		srcRect.top = clamp( srcRect.top, 0, (int)desc.Height );
		srcRect.bottom = clamp( srcRect.bottom, 0, (int)desc.Height );

		destRect.left = pDstRect->x ; destRect.right = pDstRect->x + pDstRect->width;
		destRect.top = pDstRect->y; destRect.bottom = pDstRect->y + pDstRect->height;
		destRect.left = clamp( destRect.left, 0, (int)desc.Width );
		destRect.right = clamp( destRect.right, 0, (int)desc.Width );
		destRect.top = clamp( destRect.top, 0, (int)desc.Height );
		destRect.bottom = clamp( destRect.bottom, 0, (int)desc.Height );

		hr = Dx9Device()->StretchRect( pRenderTarget, &srcRect, pTmpSurface, &destRect, filter );
		if ( FAILED(hr) )
		{
			AssertOnce( "Error resizing pixels!\n" );
			goto CleanUp;
		}
	}

	D3DSURFACE_DESC tmpDesc;
	hr = pTmpSurface->GetDesc( &tmpDesc );
	Assert( !FAILED(hr) );

	// Create a buffer the same size and format
	hr = Dx9Device()->CreateOffscreenPlainSurface( tmpDesc.Width, tmpDesc.Height, 
		desc.Format, D3DPOOL_SYSTEMMEM, &pSurfaceBits, NULL );
	if ( FAILED(hr) )
	{
		AssertOnce( "Error creating offscreen surface!\n" );
		goto CleanUp;
	}

	// Blit from the back buffer to our scratch buffer
	hr = Dx9Device()->GetRenderTargetData( pTmpSurface, pSurfaceBits );
	if ( FAILED(hr) )
	{
		AssertOnce( "Error copying bits into the offscreen surface!\n" );
		goto CleanUp;
	}

	format = ImageLoader::D3DFormatToImageFormat( desc.Format );

	pTmpSurface->Release();
#ifdef _DEBUG
	nRenderTargetRefCount = 
#endif
		pRenderTarget->Release();
	AssertOnce( nRenderTargetRefCount == 1 );
	return pSurfaceBits;

CleanUp:
	if ( pSurfaceBits )
	{
		pSurfaceBits->Release();
	}

	if ( pTmpSurface )
	{
		pTmpSurface->Release();
	}
#else
	Assert( 0 );
#endif

	return 0;
}


//-----------------------------------------------------------------------------
// Copy bits from a host-memory surface
//-----------------------------------------------------------------------------
void CShaderAPIDx8::CopyBitsFromHostSurface( IDirect3DSurface* pSurfaceBits, 
	const Rect_t &dstRect, unsigned char *pData, ImageFormat srcFormat, ImageFormat dstFormat, int nDstStride )
{
	// Copy out the bits...
	RECT rect;
	rect.left   = dstRect.x; 
	rect.right  = dstRect.x + dstRect.width; 
	rect.top    = dstRect.y; 
	rect.bottom = dstRect.y + dstRect.height;

	D3DLOCKED_RECT lockedRect;
	HRESULT hr;
	int flags = D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK;

	tmZone( TELEMETRY_LEVEL1, TMZF_NONE, "%s", __FUNCTION__ );

	hr = pSurfaceBits->LockRect( &lockedRect, &rect, flags );
	if ( !FAILED( hr ) )
	{
		unsigned char *pImage = (unsigned char *)lockedRect.pBits;
		ShaderUtil()->ConvertImageFormat( (unsigned char *)pImage, srcFormat,
			pData, dstFormat, dstRect.width, dstRect.height, lockedRect.Pitch, nDstStride );

		hr = pSurfaceBits->UnlockRect( );
	}
}


//-----------------------------------------------------------------------------
// Reads from the current read buffer  + stretches
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ReadPixels( Rect_t *pSrcRect, Rect_t *pDstRect, unsigned char *pData, ImageFormat dstFormat, int nDstStride )
{
	LOCK_SHADERAPI();
	Assert( pDstRect );
	
	if ( IsPC() || !IsX360() )
	{
		Rect_t srcRect;
		if ( !pSrcRect )
		{
			srcRect.x = srcRect.y = 0;
			srcRect.width = m_nWindowWidth;
			srcRect.height = m_nWindowHeight;
			pSrcRect = &srcRect;
		}

		ImageFormat format;
		IDirect3DSurface* pSurfaceBits = GetBackBufferImage( pSrcRect, pDstRect, format );
		if ( pSurfaceBits )
		{
			CopyBitsFromHostSurface( pSurfaceBits, *pDstRect, pData, format, dstFormat, nDstStride );
		
			// Release the temporary surface
			pSurfaceBits->Release();
		}
	}
	else
	{
#if defined( _X360 )
		// 360 requires material system to handle due to RT complexities
		ShaderUtil()->ReadBackBuffer( pSrcRect, pDstRect, pData, dstFormat, nDstStride );
#endif
	}
}


//-----------------------------------------------------------------------------
// Reads from the current read buffer
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ReadPixels( int x, int y, int width, int height, unsigned char *pData, ImageFormat dstFormat )
{
	Rect_t rect;
	rect.x = x;
	rect.y = y;
	rect.width = width;
	rect.height = height;

	if ( IsPC() || !IsX360() )
	{
		ImageFormat format;
		IDirect3DSurface* pSurfaceBits = GetBackBufferImage( &rect, &rect, format );
		if (pSurfaceBits)
		{
			CopyBitsFromHostSurface( pSurfaceBits, rect, pData, format, dstFormat, 0 );
		
			// Release the temporary surface
			pSurfaceBits->Release();
		}
	}
	else
	{
#if defined( _X360 )
		// 360 requires material system to handle due to RT complexities
		ShaderUtil()->ReadBackBuffer( &rect, &rect, pData, dstFormat, 0 );
#endif
	}
}


//-----------------------------------------------------------------------------
// Binds a particular material to render with
//-----------------------------------------------------------------------------
void CShaderAPIDx8::Bind( IMaterial* pMaterial )
{
	LOCK_SHADERAPI();
	IMaterialInternal* pMatInt = static_cast<IMaterialInternal*>( pMaterial );

	bool bMaterialChanged;
	if ( m_pMaterial && pMatInt && m_pMaterial->InMaterialPage() && pMatInt->InMaterialPage() )
	{
		bMaterialChanged = ( m_pMaterial->GetMaterialPage() != pMatInt->GetMaterialPage() );
	}
	else
	{
		bMaterialChanged = ( m_pMaterial != pMatInt ) || ( m_pMaterial && m_pMaterial->InMaterialPage() ) || ( pMatInt && pMatInt->InMaterialPage() );
	}

	if ( bMaterialChanged )
	{
		FlushBufferedPrimitives();
#ifdef RECORDING
		RECORD_DEBUG_STRING( ( char * )pMaterial->GetName() );
		IShader *pShader = pMatInt->GetShader();
		if( pShader && pShader->GetName() )
		{
			RECORD_DEBUG_STRING( pShader->GetName() );
		}
		else
		{
			RECORD_DEBUG_STRING( "<NULL SHADER>" );
		}
#endif
		m_pMaterial = pMatInt;

#if ( defined( PIX_INSTRUMENTATION ) || defined( NVPERFHUD ) )
		PIXifyName( s_pPIXMaterialName, sizeof( s_pPIXMaterialName ), m_pMaterial->GetName() );
#endif
	}
}

// Get the currently bound material
IMaterialInternal* CShaderAPIDx8::GetBoundMaterial()
{
	return m_pMaterial;
}


//-----------------------------------------------------------------------------
// Binds a standard texture
//-----------------------------------------------------------------------------
void CShaderAPIDx8::BindStandardTexture( Sampler_t sampler, StandardTextureId_t id )
{
	if ( m_StdTextureHandles[id] != INVALID_SHADERAPI_TEXTURE_HANDLE )
	{
		BindTexture( sampler, m_StdTextureHandles[id] );
	}
	else
	{
		ShaderUtil()->BindStandardTexture( sampler, id );
	}
}

void CShaderAPIDx8::BindStandardVertexTexture( VertexTextureSampler_t sampler, StandardTextureId_t id )
{
	ShaderUtil()->BindStandardVertexTexture( sampler, id );
}

void CShaderAPIDx8::GetStandardTextureDimensions( int *pWidth, int *pHeight, StandardTextureId_t id )
{
	ShaderUtil()->GetStandardTextureDimensions( pWidth, pHeight, id );
}


//-----------------------------------------------------------------------------
// Gets the lightmap dimensions
//-----------------------------------------------------------------------------
void CShaderAPIDx8::GetLightmapDimensions( int *w, int *h )
{
	ShaderUtil()->GetLightmapDimensions( w, h );
}


//-----------------------------------------------------------------------------
// Selection mode methods
//-----------------------------------------------------------------------------
int CShaderAPIDx8::SelectionMode( bool selectionMode )
{
	LOCK_SHADERAPI();
	int numHits = m_NumHits;
	if (m_InSelectionMode)
	{
		WriteHitRecord();
	}
	m_InSelectionMode = selectionMode;
	m_pCurrSelectionRecord = m_pSelectionBuffer;
	m_NumHits = 0;
	return numHits;
}

bool CShaderAPIDx8::IsInSelectionMode() const
{
	return m_InSelectionMode;
}

void CShaderAPIDx8::SelectionBuffer( unsigned int* pBuffer, int size )
{
	LOCK_SHADERAPI();
	Assert( !m_InSelectionMode );
	Assert( pBuffer && size );
	m_pSelectionBufferEnd = pBuffer + size;
	m_pSelectionBuffer = pBuffer;
	m_pCurrSelectionRecord = pBuffer;
}

void CShaderAPIDx8::ClearSelectionNames( )
{
	LOCK_SHADERAPI();
	if (m_InSelectionMode)
	{
		WriteHitRecord();
	}
	m_SelectionNames.Clear();
}

void CShaderAPIDx8::LoadSelectionName( int name )
{
	LOCK_SHADERAPI();
	if (m_InSelectionMode)
	{
		WriteHitRecord();
		Assert( m_SelectionNames.Count() > 0 );
		m_SelectionNames.Top() = name;
	}
}

void CShaderAPIDx8::PushSelectionName( int name )
{
	LOCK_SHADERAPI();
	if (m_InSelectionMode)
	{
		WriteHitRecord();
		m_SelectionNames.Push(name);
	}
}

void CShaderAPIDx8::PopSelectionName()
{
	LOCK_SHADERAPI();
	if (m_InSelectionMode)
	{
		WriteHitRecord();
		m_SelectionNames.Pop();
	}
}

void CShaderAPIDx8::WriteHitRecord( )
{
	FlushBufferedPrimitives();

	if (m_SelectionNames.Count() && (m_SelectionMinZ != FLT_MAX))
	{
		Assert( m_pCurrSelectionRecord + m_SelectionNames.Count() + 3 <	m_pSelectionBufferEnd );
		*m_pCurrSelectionRecord++ = m_SelectionNames.Count();
		// NOTE: because of rounding, "(uint32)(float)UINT32_MAX" yields zero(!), hence the use of doubles.
		// [ ALSO: As of Nov 2011, VS2010 exhibits a debug build code-gen bug if we cast the result to int32 instead of uint32 ]
	    *m_pCurrSelectionRecord++ = (uint32)( 0.5 + m_SelectionMinZ*(double)((uint32)~0) );
	    *m_pCurrSelectionRecord++ = (uint32)( 0.5 + m_SelectionMaxZ*(double)((uint32)~0) );
		for (int i = 0; i < m_SelectionNames.Count(); ++i)
		{
			*m_pCurrSelectionRecord++ = m_SelectionNames[i];
		}

		++m_NumHits;
	}

	m_SelectionMinZ = FLT_MAX;
	m_SelectionMaxZ = FLT_MIN;
}

// We hit somefin in selection mode
void CShaderAPIDx8::RegisterSelectionHit( float minz, float maxz )
{
	if (minz < 0)
		minz = 0;
	if (maxz > 1)
		maxz = 1;
	if (m_SelectionMinZ > minz)
		m_SelectionMinZ = minz;
	if (m_SelectionMaxZ < maxz)
		m_SelectionMaxZ = maxz;
}

int CShaderAPIDx8::GetCurrentNumBones( void ) const
{
	return m_DynamicState.m_NumBones;
}

bool CShaderAPIDx8::IsHWMorphingEnabled( ) const
{
	return m_DynamicState.m_bHWMorphingEnabled;
}


//-----------------------------------------------------------------------------
// Inserts the lighting block into the code
//-----------------------------------------------------------------------------

// If you change the number of lighting combinations, change this enum
enum
{
	DX8_LIGHTING_COMBINATION_COUNT = 22,
	DX9_LIGHTING_COMBINATION_COUNT = 35
};

#define MAX_LIGHTS 4


// NOTE: These should match g_lightType* in vsh_prep.pl!
static int g_DX8LightCombinations[][4] = 
{
	// static		ambient				local1				local2

	// This is a special case for no lighting at all.
	{ LIGHT_NONE,	LIGHT_NONE,			LIGHT_NONE,			LIGHT_NONE },			

	// This is a special case so that we don't have to do the ambient cube
	// when we only have static lighting
	{ LIGHT_STATIC,	LIGHT_NONE,			LIGHT_NONE,			LIGHT_NONE },			

	{ LIGHT_NONE,	LIGHT_AMBIENTCUBE,	LIGHT_NONE,			LIGHT_NONE },			
	{ LIGHT_NONE,	LIGHT_AMBIENTCUBE,	LIGHT_SPOT,			LIGHT_NONE },
	{ LIGHT_NONE,	LIGHT_AMBIENTCUBE,	LIGHT_POINT,		LIGHT_NONE },
	{ LIGHT_NONE,	LIGHT_AMBIENTCUBE,	LIGHT_DIRECTIONAL,	LIGHT_NONE },
	{ LIGHT_NONE,	LIGHT_AMBIENTCUBE,	LIGHT_SPOT,			LIGHT_SPOT },
	{ LIGHT_NONE,	LIGHT_AMBIENTCUBE,	LIGHT_SPOT,			LIGHT_POINT, },			
	{ LIGHT_NONE,	LIGHT_AMBIENTCUBE,	LIGHT_SPOT,			LIGHT_DIRECTIONAL, },
	{ LIGHT_NONE,	LIGHT_AMBIENTCUBE,	LIGHT_POINT,		LIGHT_POINT, },
	{ LIGHT_NONE,	LIGHT_AMBIENTCUBE,	LIGHT_POINT,		LIGHT_DIRECTIONAL, },
	{ LIGHT_NONE,	LIGHT_AMBIENTCUBE,	LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL, },

	{ LIGHT_STATIC,	LIGHT_AMBIENTCUBE,	LIGHT_NONE,			LIGHT_NONE },			
	{ LIGHT_STATIC,	LIGHT_AMBIENTCUBE,	LIGHT_SPOT,			LIGHT_NONE },
	{ LIGHT_STATIC,	LIGHT_AMBIENTCUBE,	LIGHT_POINT,		LIGHT_NONE },
	{ LIGHT_STATIC,	LIGHT_AMBIENTCUBE,	LIGHT_DIRECTIONAL,	LIGHT_NONE },
	{ LIGHT_STATIC,	LIGHT_AMBIENTCUBE,	LIGHT_SPOT,			LIGHT_SPOT },
	{ LIGHT_STATIC,	LIGHT_AMBIENTCUBE,	LIGHT_SPOT,			LIGHT_POINT, },			
	{ LIGHT_STATIC,	LIGHT_AMBIENTCUBE,	LIGHT_SPOT,			LIGHT_DIRECTIONAL, },
	{ LIGHT_STATIC,	LIGHT_AMBIENTCUBE,	LIGHT_POINT,		LIGHT_POINT, },
	{ LIGHT_STATIC,	LIGHT_AMBIENTCUBE,	LIGHT_POINT,		LIGHT_DIRECTIONAL, },
	{ LIGHT_STATIC,	LIGHT_AMBIENTCUBE,	LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL, }
};

// NOTE: These should match g_lightType* in vsh_prep.pl!
// They also correspond to the parallel g_LocalLightTypeXArray[] arrays in common_vs_fxc.h
static int g_DX9LightCombinations[][MAX_LIGHTS] = 
{
	// local0				local1				local2				local3
	{ LIGHT_NONE,			LIGHT_NONE,			LIGHT_NONE,			LIGHT_NONE },			// Zero lights		[ Combo 0]

	{ LIGHT_SPOT,			LIGHT_NONE,			LIGHT_NONE,			LIGHT_NONE },			// One light		[ Combo 1]
	{ LIGHT_POINT,			LIGHT_NONE,			LIGHT_NONE,			LIGHT_NONE },
	{ LIGHT_DIRECTIONAL,	LIGHT_NONE,			LIGHT_NONE,			LIGHT_NONE },

	{ LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_NONE,			LIGHT_NONE },			// Two lights		[ Combo 4]
	{ LIGHT_SPOT,			LIGHT_POINT,		LIGHT_NONE,			LIGHT_NONE },
	{ LIGHT_SPOT,			LIGHT_DIRECTIONAL,	LIGHT_NONE,			LIGHT_NONE },
	{ LIGHT_POINT,			LIGHT_POINT,		LIGHT_NONE,			LIGHT_NONE },
	{ LIGHT_POINT,			LIGHT_DIRECTIONAL,	LIGHT_NONE,			LIGHT_NONE },
	{ LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL,	LIGHT_NONE,			LIGHT_NONE },

	{ LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_NONE },			// Three lights		[ Combo 10]
	{ LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_POINT,		LIGHT_NONE },
	{ LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_DIRECTIONAL,	LIGHT_NONE },
	{ LIGHT_SPOT,			LIGHT_POINT,		LIGHT_POINT,		LIGHT_NONE },
	{ LIGHT_SPOT,			LIGHT_POINT,		LIGHT_DIRECTIONAL,	LIGHT_NONE },
	{ LIGHT_SPOT,			LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL,	LIGHT_NONE },
	{ LIGHT_POINT,			LIGHT_POINT,		LIGHT_POINT,		LIGHT_NONE },
	{ LIGHT_POINT,			LIGHT_POINT,		LIGHT_DIRECTIONAL,	LIGHT_NONE },
	{ LIGHT_POINT,			LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL,	LIGHT_NONE },
	{ LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL,	LIGHT_NONE },

	{ LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_SPOT },			// Four lights		[ Combo 20]
	{ LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_POINT },
	{ LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_DIRECTIONAL },
	{ LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_POINT,		LIGHT_POINT },
	{ LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_POINT,		LIGHT_DIRECTIONAL },
	{ LIGHT_SPOT,			LIGHT_SPOT,			LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL },
	{ LIGHT_SPOT,			LIGHT_POINT,		LIGHT_POINT,		LIGHT_POINT },
	{ LIGHT_SPOT,			LIGHT_POINT,		LIGHT_POINT,		LIGHT_DIRECTIONAL },
	{ LIGHT_SPOT,			LIGHT_POINT,		LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL },
	{ LIGHT_SPOT,			LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL },
	{ LIGHT_POINT,			LIGHT_POINT,		LIGHT_POINT,		LIGHT_POINT },
	{ LIGHT_POINT,			LIGHT_POINT,		LIGHT_POINT,		LIGHT_DIRECTIONAL },
	{ LIGHT_POINT,			LIGHT_POINT,		LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL },
	{ LIGHT_POINT,			LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL },
	{ LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL,	LIGHT_DIRECTIONAL }
};

// This is just for getting light combos for DX8
// For DX9, use GetDX9LightState()
// It is up to the shader cpp files to use the right method
int CShaderAPIDx8::GetCurrentLightCombo( void ) const
{
	Assert( g_pHardwareConfig->Caps().m_nDXSupportLevel <= 81 );

	Assert( m_DynamicState.m_NumLights <= 2 );

	COMPILE_TIME_ASSERT( DX8_LIGHTING_COMBINATION_COUNT == 
		sizeof( g_DX8LightCombinations ) / sizeof( g_DX8LightCombinations[0] ) );
	
	// hack . . do this a cheaper way.
	bool bUseAmbientCube;
	if( m_DynamicState.m_AmbientLightCube[0][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[0][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[0][2] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[1][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[1][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[1][2] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[2][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[2][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[2][2] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[3][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[3][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[3][2] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[4][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[4][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[4][2] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[5][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[5][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[5][2] == 0.0f )
	{
		bUseAmbientCube = false;
	}
	else
	{
		bUseAmbientCube = true;
	}

	Assert( m_pRenderMesh );

	const VertexShaderLightTypes_t *pLightType = m_DynamicState.m_LightType;

	if( m_DynamicState.m_NumLights == 0 && !bUseAmbientCube )
	{
		if( m_pRenderMesh->HasColorMesh() )
			return 1;		// special case for static lighting only
		else
			return 0;		// special case for no lighting at all.
	}
	
	int i;
	// hack - skip the first two for now since we don't know if the ambient cube is needed or not.
	for( i = 2; i < DX9_LIGHTING_COMBINATION_COUNT; ++i )
	{
		int j;
		for( j = 0; j < m_DynamicState.m_NumLights; ++j )
		{
			if( pLightType[j] != g_DX8LightCombinations[i][j+2] )
				break;
		}
		if( j == m_DynamicState.m_NumLights )
		{
			while( j < 2 )
			{
				if (g_DX8LightCombinations[i][j+2] != LIGHT_NONE)
					break;
				++j;
			}
			if( j == 2 )
			{
				if( m_pRenderMesh->HasColorMesh() )
				{
					return i + 10;
				}
				else
				{
					return i;
				}
			}
		}
	}

	// should never get here!
	Assert( 0 );
	return 0;
}

void CShaderAPIDx8::GetDX9LightState( LightState_t *state ) const
{
	// hack . . do this a cheaper way.
	if( m_DynamicState.m_AmbientLightCube[0][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[0][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[0][2] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[1][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[1][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[1][2] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[2][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[2][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[2][2] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[3][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[3][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[3][2] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[4][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[4][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[4][2] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[5][0] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[5][1] == 0.0f && 
		m_DynamicState.m_AmbientLightCube[5][2] == 0.0f )
	{
		state->m_bAmbientLight = false;
	}
	else
	{
		state->m_bAmbientLight = true;
	}

	Assert( m_pRenderMesh );
	Assert( m_DynamicState.m_NumLights <= 4 );

	if ( g_pHardwareConfig->SupportsPixelShaders_2_b() )
	{
		Assert( m_DynamicState.m_NumLights <= MAX_LIGHTS );		// 2b hardware gets four lights
	}
	else
	{
		Assert( m_DynamicState.m_NumLights <= (MAX_LIGHTS-2) );	// 2.0 hardware gets two less
	}

#ifdef OSX
	state->m_nNumLights = MIN(MAX_NUM_LIGHTS,m_DynamicState.m_NumLights);
#else
	state->m_nNumLights = m_DynamicState.m_NumLights;
#endif	
	
	state->m_nNumLights = m_DynamicState.m_NumLights;
	state->m_bStaticLightVertex = m_pRenderMesh->HasColorMesh();
	state->m_bStaticLightTexel = false; // For now
}

MaterialFogMode_t CShaderAPIDx8::GetCurrentFogType( void ) const
{
	return m_DynamicState.m_SceneFog;
}

void CShaderAPIDx8::RecordString( const char *pStr )
{
	RECORD_STRING( pStr );
}

void CShaderAPIDx8::EvictManagedResourcesInternal()
{
	if ( IsX360() )
		return;

	if ( !ThreadOwnsDevice() || !ThreadInMainThread() )
	{
		ShaderUtil()->OnThreadEvent( SHADER_THREAD_EVICT_RESOURCES );
		return;
	}
	if ( mat_debugalttab.GetBool() )
	{
		Warning( "mat_debugalttab: CShaderAPIDx8::EvictManagedResourcesInternal\n" );
	}

#if !defined( _X360 )
	if ( Dx9Device() )
	{
		Dx9Device()->EvictManagedResources();
	}
#endif
}

void CShaderAPIDx8::EvictManagedResources( void )
{
	if ( IsX360() )
	{
		return;
	}

	LOCK_SHADERAPI();
	Assert(ThreadOwnsDevice());
	// Tell other material system applications to release resources
	SendIPCMessage( EVICT_MESSAGE );
	EvictManagedResourcesInternal();
}

bool CShaderAPIDx8::IsDebugTextureListFresh( int numFramesAllowed /* = 1 */ )
{
	return ( m_nDebugDataExportFrame <= m_CurrentFrame ) && ( m_nDebugDataExportFrame >= m_CurrentFrame - numFramesAllowed );
}

bool CShaderAPIDx8::SetDebugTextureRendering( bool bEnable )
{
	bool bVal = m_bDebugTexturesRendering;
	m_bDebugTexturesRendering = bEnable;
	return bVal;
}

void CShaderAPIDx8::EnableDebugTextureList( bool bEnable )
{
	m_bEnableDebugTextureList = bEnable;
}

void CShaderAPIDx8::EnableGetAllTextures( bool bEnable )
{
	m_bDebugGetAllTextures = bEnable;
}

KeyValues* CShaderAPIDx8::GetDebugTextureList()
{
	return m_pDebugTextureList;
}

int CShaderAPIDx8::GetTextureMemoryUsed( TextureMemoryType eTextureMemory )
{
	switch ( eTextureMemory )
	{
	case MEMORY_BOUND_LAST_FRAME:
		return m_nTextureMemoryUsedLastFrame;
	case MEMORY_TOTAL_LOADED:
		return m_nTextureMemoryUsedTotal;
	case MEMORY_ESTIMATE_PICMIP_1:
		return m_nTextureMemoryUsedPicMip1;
	case MEMORY_ESTIMATE_PICMIP_2:
		return m_nTextureMemoryUsedPicMip2;
	default:
		return 0;
	}
}


// Allocate and delete query objects.
ShaderAPIOcclusionQuery_t CShaderAPIDx8::CreateOcclusionQueryObject( void )
{
	// don't allow this on <80 because it falls back to wireframe in that case
	if( m_DeviceSupportsCreateQuery == 0 || g_pHardwareConfig->Caps().m_nDXSupportLevel < 80 )
		return INVALID_SHADERAPI_OCCLUSION_QUERY_HANDLE;

	// While we're deactivated, m_OcclusionQueryObjects just holds NULL pointers.
	// Create a dummy one here and let ReacquireResources create the actual D3D object.
	if ( IsDeactivated() )
		return INVALID_SHADERAPI_OCCLUSION_QUERY_HANDLE;

	IDirect3DQuery9 *pQuery = NULL;
	HRESULT hr = Dx9Device()->CreateQuery( D3DQUERYTYPE_OCCLUSION, &pQuery );
	return ( hr == D3D_OK ) ? (ShaderAPIOcclusionQuery_t)pQuery : INVALID_SHADERAPI_OCCLUSION_QUERY_HANDLE;
}

void CShaderAPIDx8::DestroyOcclusionQueryObject( ShaderAPIOcclusionQuery_t handle )
{
	IDirect3DQuery9 *pQuery = (IDirect3DQuery9 *)handle;

	int nRetVal = pQuery->Release();
	Assert( nRetVal == 0 );
}

// Bracket drawing with begin and end so that we can get counts next frame.
void CShaderAPIDx8::BeginOcclusionQueryDrawing( ShaderAPIOcclusionQuery_t handle )
{
	IDirect3DQuery9 *pQuery = (IDirect3DQuery9 *)handle;

	HRESULT hResult = pQuery->Issue( D3DISSUE_BEGIN );
	Assert( hResult == D3D_OK );
}

void CShaderAPIDx8::EndOcclusionQueryDrawing( ShaderAPIOcclusionQuery_t handle )
{
	IDirect3DQuery9 *pQuery = (IDirect3DQuery9 *)handle;

	HRESULT hResult = pQuery->Issue( D3DISSUE_END );
	Assert( hResult == D3D_OK );
}

// Get the number of pixels rendered between begin and end on an earlier frame.
// Calling this in the same frame is a huge perf hit!
int CShaderAPIDx8::OcclusionQuery_GetNumPixelsRendered( ShaderAPIOcclusionQuery_t handle, bool bFlush )
{
	LOCK_SHADERAPI();
	IDirect3DQuery9 *pQuery = (IDirect3DQuery9 *)handle;

	tmZone( TELEMETRY_LEVEL1, TMZF_NONE, "%s", __FUNCTION__ );

	DWORD nPixels;
	HRESULT hResult = pQuery->GetData( &nPixels, sizeof( nPixels ), bFlush ? D3DGETDATA_FLUSH : 0 );

	// This means that the query will not finish and resulted in an error, game should use
	// the previous query's results and not reissue the query
	if ( ( hResult == D3DERR_DEVICELOST ) || ( hResult == D3DERR_NOTAVAILABLE ) )
		return OCCLUSION_QUERY_RESULT_ERROR;

	// This means the query isn't finished yet, game will have to use the previous query's
	// results and not reissue the query; wait for query to finish later.
	if ( hResult == S_FALSE )
		return OCCLUSION_QUERY_RESULT_PENDING;

	// NOTE: This appears to work around a driver bug for ATI on Vista
	if ( nPixels & 0x80000000 )
	{
		nPixels = 0;
	}
	return nPixels;
}


void CShaderAPIDx8::SetPixelShaderFogParams( int reg, ShaderFogMode_t fogMode )
{
	m_DelayedShaderConstants.iPixelShaderFogParams = reg; //save it off in case the ShaderFogMode_t disables fog. We only find out later.
	float fogParams[4];

	if( (GetPixelFogMode() != MATERIAL_FOG_NONE) && (fogMode != SHADER_FOGMODE_DISABLED) )
	{
		float ooFogRange = 1.0f;

		float fStart = m_VertexShaderFogParams[0];
		float fEnd = m_VertexShaderFogParams[1];

		// Check for divide by zero
		if ( fEnd != fStart )
		{
			ooFogRange = 1.0f / ( fEnd - fStart );
		}

		fogParams[0] = fStart * ooFogRange;		// fogStart / ( fogEnd - fogStart )
		fogParams[1] = m_DynamicState.m_FogZ;	// water height
		fogParams[2] = clamp( m_flFogMaxDensity, 0.0f, 1.0f ); // Max fog density
		fogParams[3] = ooFogRange;				// 1 / ( fogEnd - fogStart );

		if (GetPixelFogMode() == MATERIAL_FOG_LINEAR_BELOW_FOG_Z)
		{
			// terms are unused for height fog, forcing 1.0 allows unified PS math
			fogParams[0] = 0.0f;
			fogParams[2] = 1.0f;
		}
	}
	else
	{
		//emulating MATERIAL_FOG_NONE by setting the parameters so that CalcRangeFog() always returns 0. Gets rid of a dynamic combo across the ps2x set.
		fogParams[0] = 0.0f;
		fogParams[1] = m_DynamicState.m_FogZ; // water height
		fogParams[2] = 1.0f; // Max fog density
		fogParams[3] = 0.0f;
	}

	// cFogEndOverFogRange, cFogOne, unused, cOOFogRange
	SetPixelShaderConstant( reg, fogParams, 1 );
}

void CShaderAPIDx8::SetPixelShaderFogParams( int reg )
{
	SetPixelShaderFogParams( reg, m_TransitionTable.CurrentShadowState()->m_FogMode );
}

void CShaderAPIDx8::SetFlashlightState( const FlashlightState_t &state, const VMatrix &worldToTexture )
{
	LOCK_SHADERAPI();
	SetFlashlightStateEx( state, worldToTexture, NULL );
}

void CShaderAPIDx8::SetFlashlightStateEx( const FlashlightState_t &state, const VMatrix &worldToTexture, ITexture *pFlashlightDepthTexture )
{
	LOCK_SHADERAPI();
	// fixme: do a test here.
	FlushBufferedPrimitives();
	m_FlashlightState = state;
	m_FlashlightWorldToTexture = worldToTexture;
	m_pFlashlightDepthTexture = pFlashlightDepthTexture;

	if ( !g_pHardwareConfig->SupportsPixelShaders_2_b() )
	{
		m_FlashlightState.m_bEnableShadows = false;
		m_pFlashlightDepthTexture = NULL;
	}
}

const FlashlightState_t &CShaderAPIDx8::GetFlashlightState( VMatrix &worldToTexture ) const
{
	worldToTexture = m_FlashlightWorldToTexture;
	return m_FlashlightState;
}

const FlashlightState_t &CShaderAPIDx8::GetFlashlightStateEx( VMatrix &worldToTexture, ITexture **ppFlashlightDepthTexture ) const
{
	worldToTexture = m_FlashlightWorldToTexture;
	*ppFlashlightDepthTexture = m_pFlashlightDepthTexture;
	return m_FlashlightState;
}

bool CShaderAPIDx8::SupportsMSAAMode( int nMSAAMode )
{
	if ( IsX360() )
	{
		return false;
	}

	return ( D3D_OK == D3D()->CheckDeviceMultiSampleType( m_DisplayAdapter, m_DeviceType, 
														   m_PresentParameters.BackBufferFormat,
														   m_PresentParameters.Windowed,
														   ComputeMultisampleType( nMSAAMode ), NULL ) );
}

bool CShaderAPIDx8::SupportsCSAAMode( int nNumSamples, int nQualityLevel )
{
#ifdef DX_TO_GL_ABSTRACTION
	// GL_NV_framebuffer_multisample_coverage
	return false;
#endif

	// Only nVidia does this kind of AA
	if ( g_pHardwareConfig->Caps().m_VendorID != VENDORID_NVIDIA )
		return false;

	DWORD dwQualityLevels = 0;
	HRESULT hr = D3D()->CheckDeviceMultiSampleType( m_DisplayAdapter, m_DeviceType, 
													 m_PresentParameters.BackBufferFormat,
													 m_PresentParameters.Windowed,
													 ComputeMultisampleType( nNumSamples ), &dwQualityLevels );

	return ( ( D3D_OK == hr ) && ( (int) dwQualityLevels >= nQualityLevel ) );
}

bool CShaderAPIDx8::SupportsShadowDepthTextures( void )
{
	return g_pHardwareConfig->Caps().m_bSupportsShadowDepthTextures;
}

bool CShaderAPIDx8::SupportsBorderColor( void ) const
{
	return g_pHardwareConfig->Caps().m_bSupportsBorderColor;
}

bool CShaderAPIDx8::SupportsFetch4( void )
{
	return IsPC() && g_pHardwareConfig->Caps().m_bSupportsFetch4;
}

ImageFormat CShaderAPIDx8::GetShadowDepthTextureFormat( void )
{
	return g_pHardwareConfig->Caps().m_ShadowDepthTextureFormat;
}

ImageFormat CShaderAPIDx8::GetNullTextureFormat( void )
{
	return g_pHardwareConfig->Caps().m_NullTextureFormat;
}

void CShaderAPIDx8::SetShadowDepthBiasFactors( float fShadowSlopeScaleDepthBias, float fShadowDepthBias )
{
	m_fShadowSlopeScaleDepthBias = fShadowSlopeScaleDepthBias;
	m_fShadowDepthBias = fShadowDepthBias;
}

void CShaderAPIDx8::ClearVertexAndPixelShaderRefCounts()
{
	LOCK_SHADERAPI();
	ShaderManager()->ClearVertexAndPixelShaderRefCounts();
}

void CShaderAPIDx8::PurgeUnusedVertexAndPixelShaders()
{
	LOCK_SHADERAPI();
	ShaderManager()->PurgeUnusedVertexAndPixelShaders();
}

bool CShaderAPIDx8::UsingSoftwareVertexProcessing() const
{
	return g_pHardwareConfig->Caps().m_bSoftwareVertexProcessing;
}

ITexture *CShaderAPIDx8::GetRenderTargetEx( int nRenderTargetID )
{
	return ShaderUtil()->GetRenderTargetEx( nRenderTargetID );
}

float CShaderAPIDx8::GetLightMapScaleFactor( void ) const
{
	switch( HardwareConfig()->GetHDRType() )
	{

		case HDR_TYPE_FLOAT:
			return 1.0;
			break;

		case HDR_TYPE_INTEGER:
			return 16.0;

		case HDR_TYPE_NONE:
		default:
			return GammaToLinearFullRange( 2.0 );	// light map scale
	}
}

void CShaderAPIDx8::SetToneMappingScaleLinear( const Vector &scale )
{
	if ( g_pHardwareConfig->SupportsPixelShaders_2_0() )
	{
		// Flush buffered primitives before changing the tone map scalar!
		FlushBufferedPrimitives();

		Vector scale_to_use = scale;
		m_ToneMappingScale.AsVector3D() = scale_to_use;
		
		bool mode_uses_srgb=false;

		switch( HardwareConfig()->GetHDRType() )
		{
			case HDR_TYPE_NONE:
				m_ToneMappingScale.x = 1.0;										// output scale
				m_ToneMappingScale.z = 1.0;										// reflection map scale
				break;

			case HDR_TYPE_FLOAT:
				m_ToneMappingScale.x = scale_to_use.x;							// output scale
				m_ToneMappingScale.z = 1.0;										// reflection map scale
				break;

			case HDR_TYPE_INTEGER:
				mode_uses_srgb = true;
				m_ToneMappingScale.x = scale_to_use.x;							// output scale
				m_ToneMappingScale.z = 16.0;									// reflection map scale
				break;
		}

		m_ToneMappingScale.y = GetLightMapScaleFactor();	// light map scale

		// w component gets gamma scale
		m_ToneMappingScale.w = LinearToGammaFullRange( m_ToneMappingScale.x );
		SetPixelShaderConstant( TONE_MAPPING_SCALE_PSH_CONSTANT, m_ToneMappingScale.Base() );

		// We have to change the fog color since we tone map directly in the shaders in integer HDR mode.
		if ( HardwareConfig()->GetHDRType() == HDR_TYPE_INTEGER && m_TransitionTable.CurrentShadowState() )
		{
			// Get the shadow state in sync since it depends on SetToneMappingScaleLinear.
			ApplyFogMode( m_TransitionTable.CurrentShadowState()->m_FogMode, mode_uses_srgb, m_TransitionTable.CurrentShadowState()->m_bDisableFogGammaCorrection );
		}
	}
}

const Vector & CShaderAPIDx8::GetToneMappingScaleLinear( void ) const
{
	return m_ToneMappingScale.AsVector3D();
}

void CShaderAPIDx8::EnableLinearColorSpaceFrameBuffer( bool bEnable )
{
	LOCK_SHADERAPI();
	m_TransitionTable.EnableLinearColorSpaceFrameBuffer( bEnable );
}


void CShaderAPIDx8::SetPSNearAndFarZ( int pshReg )
{
	VMatrix m;
	GetMatrix( MATERIAL_PROJECTION, m.m[0] );

	// m[2][2] =  F/(N-F)   (flip sign if RH)
	// m[3][2] = NF/(N-F)

	float vNearFar[4];

	float N =     m[3][2] / m[2][2];
	float F = (m[3][2]*N) / (N + m[3][2]);

	vNearFar[0] = N;
	vNearFar[1] = F;


	SetPixelShaderConstant( pshReg, vNearFar, 1 );
}


void CShaderAPIDx8::SetFloatRenderingParameter( int parm_number, float value )
{
	LOCK_SHADERAPI();
	if ( parm_number < ARRAYSIZE( FloatRenderingParameters ))
		FloatRenderingParameters[parm_number] = value;
}

void CShaderAPIDx8::SetIntRenderingParameter( int parm_number, int value )
{
	LOCK_SHADERAPI();
	if ( parm_number < ARRAYSIZE( IntRenderingParameters ))
		IntRenderingParameters[parm_number] = value;
}

void CShaderAPIDx8::SetVectorRenderingParameter( int parm_number, Vector const & value )
{
	LOCK_SHADERAPI();
	if ( parm_number < ARRAYSIZE( VectorRenderingParameters ))
		VectorRenderingParameters[parm_number] = value;
}

float CShaderAPIDx8::GetFloatRenderingParameter( int parm_number ) const
{
	LOCK_SHADERAPI();
	if ( parm_number < ARRAYSIZE( FloatRenderingParameters ))
		return FloatRenderingParameters[parm_number];
	else
		return 0.0;
}

int CShaderAPIDx8::GetIntRenderingParameter( int parm_number ) const
{
	LOCK_SHADERAPI();
	if ( parm_number < ARRAYSIZE( IntRenderingParameters ))
		return IntRenderingParameters[parm_number];
	else
		return 0;
}

Vector CShaderAPIDx8::GetVectorRenderingParameter( int parm_number ) const
{
	LOCK_SHADERAPI();
	if ( parm_number < ARRAYSIZE( VectorRenderingParameters ))
		return VectorRenderingParameters[parm_number];
	else
		return Vector( 0, 0, 0 );
}

// stencil entry points
void CShaderAPIDx8::SetStencilEnable( bool onoff )
{
	LOCK_SHADERAPI();
	SetRenderState( D3DRS_STENCILENABLE, onoff?TRUE:FALSE, true );
}

void CShaderAPIDx8::SetStencilFailOperation( StencilOperation_t op )
{
	LOCK_SHADERAPI();
	SetRenderState( D3DRS_STENCILFAIL, op, true );
}

void CShaderAPIDx8::SetStencilZFailOperation( StencilOperation_t op )
{
	LOCK_SHADERAPI();
	SetRenderState( D3DRS_STENCILZFAIL, op, true );
}

void CShaderAPIDx8::SetStencilPassOperation( StencilOperation_t op )
{
	LOCK_SHADERAPI();
	SetRenderState( D3DRS_STENCILPASS, op, true );
}

void CShaderAPIDx8::SetStencilCompareFunction( StencilComparisonFunction_t cmpfn )
{
	LOCK_SHADERAPI();
	SetRenderState( D3DRS_STENCILFUNC, cmpfn, true );
}

void CShaderAPIDx8::SetStencilReferenceValue( int ref )
{
	LOCK_SHADERAPI();
	SetRenderState( D3DRS_STENCILREF, ref, true );
}

void CShaderAPIDx8::SetStencilTestMask( uint32 msk )
{
	LOCK_SHADERAPI();
	SetRenderState( D3DRS_STENCILMASK, msk, true );
}

void CShaderAPIDx8::SetStencilWriteMask( uint32 msk )
{
	LOCK_SHADERAPI();
	SetRenderState( D3DRS_STENCILWRITEMASK, msk, true );
}

void CShaderAPIDx8::ClearStencilBufferRectangle(
	int xmin, int ymin, int xmax, int ymax,int value)
{
	LOCK_SHADERAPI();
	D3DRECT clear;
	clear.x1 = xmin;
	clear.y1 = ymin;
	clear.x2 = xmax;
	clear.y2 = ymax;

	Dx9Device()->Clear( 
		1, &clear, D3DCLEAR_STENCIL, 0, 0, value );	
}

int CShaderAPIDx8::CompareSnapshots( StateSnapshot_t snapshot0, StateSnapshot_t snapshot1 )
{
	LOCK_SHADERAPI();
	const ShadowState_t &shadow0 = m_TransitionTable.GetSnapshot(snapshot0);
	const ShadowState_t &shadow1 = m_TransitionTable.GetSnapshot(snapshot1);
	const ShadowShaderState_t &shader0 = m_TransitionTable.GetSnapshotShader(snapshot0);
	const ShadowShaderState_t &shader1 = m_TransitionTable.GetSnapshotShader(snapshot1);

	int dVertex = shader0.m_VertexShader - shader1.m_VertexShader;
	if ( dVertex )
		return dVertex;
	int dVCombo = shader0.m_nStaticVshIndex - shader1.m_nStaticVshIndex;
	if ( dVCombo)
		return dVCombo;

	int dPixel = shader0.m_PixelShader - shader1.m_PixelShader;
	if ( dPixel )
		return dPixel;
	int dPCombo = shader0.m_nStaticPshIndex - shader1.m_nStaticPshIndex;
	if ( dPCombo)
		return dPCombo;

	return snapshot0 - snapshot1;
}

//-----------------------------------------------------------------------------
// X360 TTF support requires XUI state manipulation of d3d.
// Font support lives inside the shaderapi in order to maintain privacy of d3d.
//-----------------------------------------------------------------------------
#if defined( _X360 )
HXUIFONT CShaderAPIDx8::OpenTrueTypeFont( const char *pFontname, int tall, int style )
{
	LOCK_SHADERAPI();

	struct fontTable_t
	{
		const char	*pFontName;
		const char	*pPath;
	};

	// explicit mapping now required, dvd searching to expensive
	static fontTable_t fontToFilename[] = 
	{
		{"tf2",				"tf/resource/tf2.ttf"},
		{"tf2 build",		"tf/resource/tf2build.ttf"},
		{"tf2 professor",	"tf/resource/tf2professor.ttf"},
		{"tf2 secondary",	"tf/resource/tf2secondary.ttf"},
		{"team fortress",	"tf/resource/tf.ttf"},
		{"tfd",				"tf/resource/tfd.ttf"},
		{"tflogo",			"tf/resource/tflogo.ttf"},
		{"hl2ep2",			"ep2/resource/hl2ep2.ttf"},
		{"hl2ep1",			"episodic/resource/hl2ep1.ttf"},
		{"halflife2",		"hl2/resource/halflife2.ttf"},
		{"hl2cross",		"hl2/resource/HL2Crosshairs.ttf"},
		{"courier new",		"platform/vgui/fonts/cour.ttf"},
		{"times new roman",	"platform/vgui/fonts/times.ttf"},
		{"trebuchet ms",	"platform/vgui/fonts/trebuc.ttf"},
		{"verdana",			"platform/vgui/fonts/verdana.ttf"},
		{"tahoma",			"platform/vgui/fonts/tahoma.ttf"},
	};

	// remap typeface to diskname
	const char *pDiskname = NULL;
	for ( int i=0; i<ARRAYSIZE( fontToFilename ); i++ )
	{
		if ( !V_stricmp( pFontname, fontToFilename[i].pFontName ) )
		{
			pDiskname = fontToFilename[i].pPath;
			break;
		}
	}
	if ( !pDiskname )
	{
		// not found
		DevMsg( "True Type Font: '%s' unknown.\n", pFontname );
		return NULL;
	}

	// font will be registered using typeface name
	wchar_t	wchFontname[MAX_PATH];
	Q_UTF8ToUnicode( pFontname, wchFontname, sizeof( wchFontname ) );

	// find font in registered typefaces
	TypefaceDescriptor *pDescriptors = NULL;
	DWORD numTypeFaces = 0;
	HRESULT hr = XuiEnumerateTypefaces( &pDescriptors, &numTypeFaces );
	if ( FAILED( hr ) )
	{
		return NULL;
	}

	bool bRegistered = false;
	for ( DWORD i=0; i<numTypeFaces; i++ )
	{
		if ( !V_wcscmp( pDescriptors->szTypeface, wchFontname ) )
		{
			bRegistered = true;
			break;
		}
	}

	XuiDestroyTypefaceList( pDescriptors, numTypeFaces );

	if ( !bRegistered )
	{
		// unregistered type face, register type face and retry
		// only file based resource locators work
		char filename[MAX_PATH];
		V_snprintf( filename, sizeof( filename ), "file://d:/%s", pDiskname );
		Q_FixSlashes( filename, '/' );

		wchar_t	wchFilename[MAX_PATH];
		Q_UTF8ToUnicode( filename, wchFilename, sizeof( wchFilename ) );

		TypefaceDescriptor desc;
		desc.fBaselineAdjust = 0;
		desc.szFallbackTypeface = NULL;
		desc.szLocator = wchFilename;
		desc.szReserved1 = 0;
		desc.szTypeface = wchFontname;
		hr = XuiRegisterTypeface( &desc, FALSE );
		if ( FAILED( hr ) )
		{
			return NULL;
		}
	}

	// empirically derived factor to achieve desired cell height
	float pointSize = tall * 0.59f;
	HXUIFONT hFont = NULL;
	hr = XuiCreateFont( wchFontname, pointSize, style, 0, &hFont );
	if ( FAILED( hr ) )
	{
		return NULL;
	}

	return hFont;
}
#endif

//-----------------------------------------------------------------------------
// Release TTF
//-----------------------------------------------------------------------------
#if defined( _X360 )
void CShaderAPIDx8::CloseTrueTypeFont( HXUIFONT hFont )
{
	if ( !hFont )
		return;
	LOCK_SHADERAPI();

	XuiReleaseFont( hFont );
}
#endif

//-----------------------------------------------------------------------------
// Get the TTF Metrics
//-----------------------------------------------------------------------------
#if defined( _X360 )
bool CShaderAPIDx8::GetTrueTypeFontMetrics( HXUIFONT hFont, XUIFontMetrics *pFontMetrics, XUICharMetrics charMetrics[256] )
{
	if ( !hFont )
		return false;

	LOCK_SHADERAPI();

	V_memset( charMetrics, 0, 256 * sizeof( XUICharMetrics ) );

	HRESULT hr = XuiGetFontMetrics( hFont, pFontMetrics );
	if ( !FAILED( hr ) )
	{
		// X360 issue: max character width may be too small.
		// Run through each character and fixup
		for ( int i = 1; i < 256; i++ )
		{
			wchar_t wch = i;
			hr = XuiGetCharMetrics( hFont, wch, &charMetrics[i] );
			if ( !FAILED( hr ) )
			{
				float maxWidth = charMetrics[i].fMaxX;
				if ( charMetrics[i].fMinX < 0 )
				{
					maxWidth = charMetrics[i].fMaxX - charMetrics[i].fMinX;
				}
				if ( maxWidth > pFontMetrics->fMaxWidth )
				{
					pFontMetrics->fMaxWidth = maxWidth;
				}
				if ( charMetrics[i].fAdvance > pFontMetrics->fMaxWidth )
				{
					pFontMetrics->fMaxWidth = charMetrics[i].fAdvance;
				}
			}
		}

		// fonts are getting cut off, MaxHeight seems to be misreported smaller
		// take MaxHeight to be the larger of its reported value or (ascent + descent)
		float maxHeight = 0;
		if ( pFontMetrics->fMaxDescent <= 0 )
		{
			// descent is negative for below baseline
			maxHeight = pFontMetrics->fMaxAscent - pFontMetrics->fMaxDescent;
		}
		if ( maxHeight > pFontMetrics->fMaxHeight )
		{
			pFontMetrics->fMaxHeight = maxHeight;
		}
	}

	return ( !FAILED( hr ) );
}
#endif

//-----------------------------------------------------------------------------
// Gets the glyph bits in rgba order. This function PURPOSELY hijacks D3D
// because XUI is involved. It is called at a very specific place in the VGUI
// render frame where its deleterious affects are going to be harmless.
//-----------------------------------------------------------------------------
#if defined( _X360 )
bool CShaderAPIDx8::GetTrueTypeGlyphs( HXUIFONT hFont, int numChars, wchar_t *pWch, int *pOffsetX, int *pOffsetY, int *pWidth, int *pHeight, unsigned char *pRGBA, int *pRGBAOffset )
{
	if ( !hFont )
		return false;

	// Ensure this doesn't talk to D3D at the same time as the loading bar
	AUTO_LOCK_FM( m_nonInteractiveModeMutex );


	LOCK_SHADERAPI();
	bool bSuccess = false;
	IDirect3DSurface *pRTSurface = NULL;
	IDirect3DSurface *pSavedSurface = NULL;
	IDirect3DSurface *pSavedDepthSurface = NULL;
	IDirect3DTexture *pTexture = NULL;
	D3DVIEWPORT9 savedViewport;
    D3DXMATRIX matView;
    D3DXMATRIX matXForm;
	D3DLOCKED_RECT lockedRect;

	// have to reset to default state to rasterize glyph correctly
	// state will get re-established during next mesh draw
	ResetRenderState( false );
	Dx9Device()->SetRenderState( D3DRS_ZENABLE, FALSE );

	Dx9Device()->GetRenderTarget( 0, &pSavedSurface );
	Dx9Device()->GetDepthStencilSurface( &pSavedDepthSurface );
	Dx9Device()->GetViewport( &savedViewport );

	// Figure out the size of surface/texture we need to allocate
	int rtWidth = 0;
	int rtHeight = 0;
	for ( int i = 0; i < numChars; i++ )
	{
		rtWidth += pWidth[i];
		rtHeight = max( rtHeight, pHeight[i] );
	}

	// per resolve() restrictions
	rtWidth = AlignValue( rtWidth, 32 );
	rtHeight = AlignValue( rtHeight, 32 );

	// create a render target to capture the glyph render
	pRTSurface = g_TextureHeap.AllocRenderTargetSurface( rtWidth, rtHeight, D3DFMT_A8R8G8B8 );
	if ( !pRTSurface )
		goto cleanUp;

	Dx9Device()->SetRenderTarget( 0, pRTSurface );
	// Disable depth here otherwise you get a colour/depth multisample mismatch error (in 480p)
	Dx9Device()->SetDepthStencilSurface( NULL );
	Dx9Device()->Clear( 0, NULL, D3DCLEAR_TARGET, 0x00000000, ( ReverseDepthOnX360() ? 0.0 : 1.0f ), 0 );

	// create texture to get glyph render from EDRAM
	HRESULT hr = Dx9Device()->CreateTexture( rtWidth, rtHeight, 1, 0, D3DFMT_A8R8G8B8, 0, &pTexture, NULL );
	if ( FAILED( hr ) )
		goto cleanUp;


	bool bPreviousOwnState = OwnGPUResources( false );
	XuiRenderBegin( m_hDC, 0x00000000 );

	D3DXMatrixIdentity( &matView );
	XuiRenderSetViewTransform( m_hDC, &matView );
	XuiRenderSetTransform( m_hDC, &matView );

	// rasterize the glyph
	XuiSelectFont( m_hDC, hFont );
	XuiSetColorFactor( m_hDC, 0xFFFFFFFF );

	// Draw the characters, stepping across the texture
	int xCursor = 0;
	for ( int i = 0; i < numChars; i++)
	{
		// FIXME: the drawRect params don't make much sense (should use "(xCursor+pWidth[i]), pHeight[i]", but then some characters disappear!)
		XUIRect drawRect = XUIRect( xCursor + pOffsetX[i], pOffsetY[i], rtWidth, rtHeight );
		wchar_t	text[2] = { pWch[i], 0 };
		XuiDrawText( m_hDC, text, XUI_FONT_STYLE_NORMAL|XUI_FONT_STYLE_SINGLE_LINE|XUI_FONT_STYLE_NO_WORDWRAP, 0, &drawRect ); 
		xCursor += pWidth[i];
	}

	XuiRenderEnd( m_hDC );
	OwnGPUResources( bPreviousOwnState );


	// transfer from edram to system
	hr = Dx9Device()->Resolve( 0, NULL, pTexture, NULL, 0, 0, NULL, 0, 0, NULL );
	if ( FAILED( hr ) )
		goto cleanUp;

	hr = pTexture->LockRect( 0, &lockedRect, NULL, 0 );
	if ( FAILED( hr ) )
		goto cleanUp;

	// transfer to linear format, one character at a time
	xCursor = 0;
	for ( int i = 0;i < numChars; i++ )
	{
		int destPitch = pWidth[i]*4;
		unsigned char *pLinear = pRGBA + pRGBAOffset[i];
		RECT copyRect = { xCursor, 0, xCursor + pWidth[i], pHeight[i] };
		xCursor += pWidth[i];
		XGUntileSurface( pLinear, destPitch, NULL, lockedRect.pBits, rtWidth, rtHeight, &copyRect, 4 );

		// convert argb to rgba
		float r, g, b, a;
		for ( int y = 0; y < pHeight[i]; y++ )
		{
			unsigned char *pSrc = (unsigned char*)pLinear + y*destPitch;
			for ( int x = 0; x < pWidth[i]; x++ )
			{
				// undo pre-multiplied alpha since glyph bits will be sourced as a rgba texture
				if ( !pSrc[0] )
					a = 1;
				else
					a = (float)pSrc[0] * 1.0f/255.0f;
				
				r = ((float)pSrc[1] * 1.0f/255.0f)/a * 255.0f;
				if ( r > 255 )
					r = 255;
			
				g = ((float)pSrc[2] * 1.0f/255.0f)/a * 255.0f;
				if ( g > 255 )
					g = 255;

				b = ((float)pSrc[3] * 1.0f/255.0f)/a * 255.0f;
				if ( b > 255 )
					b = 255;

				pSrc[3] = pSrc[0];
				pSrc[2] = b;
				pSrc[1] = g;
				pSrc[0] = r;

				pSrc += 4;
			}
		}
	}

	pTexture->UnlockRect( 0 );

	bSuccess = true;

cleanUp:
	if ( pRTSurface )
	{
		Dx9Device()->SetRenderTarget( 0, pSavedSurface );
		Dx9Device()->SetDepthStencilSurface( pSavedDepthSurface );
		Dx9Device()->SetViewport( &savedViewport );
		pRTSurface->Release();
	}

	if ( pTexture )
		pTexture->Release();

	if ( pSavedSurface )
		pSavedSurface->Release();

	// XUI changed renderstates behind our back, so we need to reset to defaults again to get back in synch:
	ResetRenderState( false );

	return bSuccess;
}
#endif

//-----------------------------------------------------------------------------
// Create a 360 Render Target Surface
//-----------------------------------------------------------------------------
#if defined( _X360 )
ShaderAPITextureHandle_t CShaderAPIDx8::CreateRenderTargetSurface( int width, int height, ImageFormat format, const char *pDebugName, const char *pTextureGroupName )
{
	LOCK_SHADERAPI();
	ShaderAPITextureHandle_t textureHandle = CreateTextureHandle();
	Texture_t *pTexture = &GetTexture( textureHandle );

	pTexture->m_Flags = (Texture_t::IS_ALLOCATED | Texture_t::IS_RENDER_TARGET_SURFACE);
	
	pTexture->m_DebugName = pDebugName;
	pTexture->m_Width = width;
	pTexture->m_Height = height;
	pTexture->m_Depth = 1;
	pTexture->m_NumCopies = 1;
	pTexture->m_CurrentCopy = 0;

	ImageFormat dstImageFormat = FindNearestSupportedFormat( format, false, true, false );
	D3DFORMAT actualFormat = ImageLoader::ImageFormatToD3DFormat( dstImageFormat );

	pTexture->GetRenderTargetSurface( false ) = g_TextureHeap.AllocRenderTargetSurface( width, height, actualFormat );
	pTexture->GetRenderTargetSurface( true ) = g_TextureHeap.AllocRenderTargetSurface( width, height, (D3DFORMAT)MAKESRGBFMT( actualFormat ) );

	pTexture->SetImageFormat( dstImageFormat );

	pTexture->m_UTexWrap = D3DTADDRESS_CLAMP;
	pTexture->m_VTexWrap = D3DTADDRESS_CLAMP;
	pTexture->m_WTexWrap = D3DTADDRESS_CLAMP;
	pTexture->m_MagFilter = D3DTEXF_LINEAR;

	pTexture->m_NumLevels = 1;
	pTexture->m_MipFilter = D3DTEXF_NONE;
	pTexture->m_MinFilter = D3DTEXF_LINEAR;

	pTexture->m_SwitchNeeded = false;
	
	ComputeStatsInfo( textureHandle, false, false );
	SetupTextureGroup( textureHandle, pTextureGroupName );

	return textureHandle;
}
#endif

//-----------------------------------------------------------------------------
// Shader constants are batched and written to gpu once prior to draw.
//-----------------------------------------------------------------------------
void CShaderAPIDx8::WriteShaderConstantsToGPU()
{	
#if defined( _X360 )
	// vector vertex constants can just blast their set range
	if ( m_MaxVectorVertexShaderConstant )
	{
		if ( m_bGPUOwned )
		{
			// faster path, write directly into GPU command buffer, bypassing shadow state
			// can only set what is actually owned
			Assert( m_MaxVectorVertexShaderConstant <= VERTEX_SHADER_MODEL + 3*NUM_MODEL_TRANSFORMS );
			int numVectors = AlignValue( m_MaxVectorVertexShaderConstant, 4 );
			BYTE* pCommandBufferData;
			Dx9Device()->GpuBeginVertexShaderConstantF4( 0, (D3DVECTOR4**)&pCommandBufferData, numVectors );
			memcpy( pCommandBufferData, m_DesiredState.m_pVectorVertexShaderConstant[0].Base(), numVectors * (sizeof( float ) * 4) );
			Dx9Device()->GpuEndVertexShaderConstantF4();
		}
		else
		{
			Dx9Device()->SetVertexShaderConstantF( 0, m_DesiredState.m_pVectorVertexShaderConstant[0].Base(), m_MaxVectorVertexShaderConstant );
		}

		memcpy( m_DynamicState.m_pVectorVertexShaderConstant[0].Base(), m_DesiredState.m_pVectorVertexShaderConstant[0].Base(), m_MaxVectorVertexShaderConstant * 4 * sizeof(float) );
		m_MaxVectorVertexShaderConstant = 0;
	}

	if ( m_MaxVectorPixelShaderConstant )
	{
		if ( m_bGPUOwned )
		{
			// faster path, write directly into GPU command buffer, bypassing shadow state
			// can only set what is actually owned
			Assert( m_MaxVectorPixelShaderConstant <= 32 );
			int numVectors = AlignValue( m_MaxVectorPixelShaderConstant, 4 );
			BYTE* pCommandBufferData;
			Dx9Device()->GpuBeginPixelShaderConstantF4( 0, (D3DVECTOR4**)&pCommandBufferData, numVectors );
			memcpy( pCommandBufferData, m_DesiredState.m_pVectorPixelShaderConstant[0].Base(), numVectors * (sizeof( float ) * 4) );
			Dx9Device()->GpuEndPixelShaderConstantF4();
		}
		else
		{
			Dx9Device()->SetPixelShaderConstantF( 0, m_DesiredState.m_pVectorPixelShaderConstant[0].Base(), m_MaxVectorPixelShaderConstant );
		}

		memcpy( m_DynamicState.m_pVectorPixelShaderConstant[0].Base(), m_DesiredState.m_pVectorPixelShaderConstant[0].Base(), m_MaxVectorPixelShaderConstant * 4 * sizeof(float) );
		m_MaxVectorPixelShaderConstant = 0;
	}

	// boolean and integer constants can just blast their set range
	// these are currently extremely small in number, if this changes they may benefit from a fast path pattern
	if ( m_MaxBooleanVertexShaderConstant )
	{
		Dx9Device()->SetVertexShaderConstantB( 0, m_DesiredState.m_pBooleanVertexShaderConstant, m_MaxBooleanVertexShaderConstant );
		memcpy( m_DynamicState.m_pBooleanVertexShaderConstant, m_DesiredState.m_pBooleanVertexShaderConstant, m_MaxBooleanVertexShaderConstant * sizeof(BOOL) );
		m_MaxBooleanVertexShaderConstant = 0;
	}
	if ( m_MaxIntegerVertexShaderConstant )
	{
		Dx9Device()->SetVertexShaderConstantI( 0, (int *)m_DesiredState.m_pIntegerVertexShaderConstant, m_MaxIntegerVertexShaderConstant );
		memcpy( m_DynamicState.m_pIntegerVertexShaderConstant[0].Base(), m_DesiredState.m_pIntegerVertexShaderConstant[0].Base(), m_MaxIntegerVertexShaderConstant * sizeof(IntVector4D) );
		m_MaxIntegerVertexShaderConstant = 0;
	}

	if ( m_MaxBooleanPixelShaderConstant )
	{
		Dx9Device()->SetPixelShaderConstantB( 0, m_DesiredState.m_pBooleanPixelShaderConstant, m_MaxBooleanPixelShaderConstant );
		memcpy( m_DynamicState.m_pBooleanPixelShaderConstant, m_DesiredState.m_pBooleanPixelShaderConstant, m_MaxBooleanPixelShaderConstant * sizeof(BOOL) );
		m_MaxBooleanPixelShaderConstant = 0;
	}

	// integer pixel constants are not used, so not supporting
#if 0
	if ( m_MaxIntegerPixelShaderConstant )
	{
		Dx9Device()->SetPixelShaderConstantI( 0, (int *)m_DesiredState.m_pIntegerPixelShaderConstant, m_MaxIntegerPixelShaderConstant );
		memcpy( m_DynamicState.m_pIntegerPixelShaderConstant[0].Base(), m_DesiredState.m_pIntegerPixelShaderConstant[0].Base(), m_MaxIntegerPixelShaderConstant * sizeof(IntVector4D) );
		m_MaxIntegerPixelShaderConstant = 0;
	}
#endif
#endif
}

//-----------------------------------------------------------------------------
// The application is about to perform a hard reboot, but wants to hide the screen flash
// by persisting the front buffer across a reboot boundary. The persisted frame buffer
// can be detected and restored.
//-----------------------------------------------------------------------------
#if defined( _X360 )
void CShaderAPIDx8::PersistDisplay()
{
	if ( m_PresentParameters.FrontBufferFormat != D3DFMT_LE_X8R8G8B8 )
	{
		// The format must be what PersistDisplay() expects, otherwise D3DRIP.
		// If this hits due to sRGB bit set that confuses PersistDisplay(),
		// the fix may be to slam the presentation parameters to the expected format,
		// do a ResetDevice(), and then PersistDisplay().
		Assert( 0 );
		return;
	}

	IDirect3DTexture *pTexture;
	HRESULT hr = Dx9Device()->GetFrontBuffer( &pTexture );
	if ( !FAILED( hr ) )
	{
		OwnGPUResources( false );
		Dx9Device()->PersistDisplay( pTexture, NULL );
		pTexture->Release();
	}
}
#endif

#if defined( _X360 )
bool CShaderAPIDx8::PostQueuedTexture( const void *pData, int nDataSize, ShaderAPITextureHandle_t *pHandles, int numHandles, int nWidth, int nHeight, int nDepth, int numMips, int *pRefCount )
{
	CUtlBuffer vtfBuffer;	
	IVTFTexture *pVTFTexture = NULL;
	bool bOK = false;
	
	if ( !pData || !nDataSize )
	{
		// invalid
		goto cleanUp;
	}

	// get a unique vtf and mount texture
	// vtf can expect non-volatile buffer data to be stable through vtf lifetime
	// this prevents redundant copious amounts of image memory transfers
	pVTFTexture = CreateVTFTexture();
	vtfBuffer.SetExternalBuffer( (void *)pData, nDataSize, nDataSize );	
	if ( !pVTFTexture->UnserializeFromBuffer( vtfBuffer, false, false, false, 0 ) )
	{
		goto cleanUp;
	}

	// provided vtf buffer is all mips, determine top mip due to possible picmip
	int iTopMip = 0;
	int mipWidth, mipHeight, mipDepth;
	do
	{
		pVTFTexture->ComputeMipLevelDimensions( iTopMip, &mipWidth, &mipHeight, &mipDepth );
		if ( nWidth == mipWidth && nHeight == mipHeight && nDepth == mipDepth )
		{
			break;
		}
		iTopMip++;
	} 
	while ( mipWidth != 1 || mipHeight != 1 || mipDepth != 1 );
	
	// create and blit
	for ( int iFrame = 0; iFrame < numHandles; iFrame++ )
	{
		ShaderAPITextureHandle_t hTexture = pHandles[iFrame];
		Texture_t *pTexture = &GetTexture( hTexture );

		int nFaceCount = ( pTexture->m_CreationFlags & TEXTURE_CREATE_CUBEMAP ) ? CUBEMAP_FACE_COUNT-1 : 1;

		IDirect3DBaseTexture *pD3DTexture;
		if ( pTexture->m_CreationFlags & TEXTURE_CREATE_NOD3DMEMORY )
		{
			pD3DTexture = pTexture->GetTexture();
			if ( !g_TextureHeap.AllocD3DMemory( pD3DTexture ) )
			{
				goto cleanUp;
			}
		}
		else
		{
			pD3DTexture = pTexture->GetTexture();
		}

		// blit the hi-res texture bits into d3d memory
		for ( int iFace = 0; iFace < nFaceCount; ++iFace )
		{
			for ( int iMip = 0; iMip < numMips; ++iMip )
			{
				pVTFTexture->ComputeMipLevelDimensions( iTopMip + iMip, &mipWidth, &mipHeight, &mipDepth );
				unsigned char *pSourceBits = pVTFTexture->ImageData( iFrame, iFace, iTopMip + iMip, 0, 0, 0 );

				TextureLoadInfo_t info;
				info.m_TextureHandle = hTexture;
				info.m_pTexture = pD3DTexture;
				info.m_nLevel = iMip;
				info.m_nCopy = 0;
				info.m_CubeFaceID = (D3DCUBEMAP_FACES)iFace;
				info.m_nWidth = mipWidth;
				info.m_nHeight = mipHeight;
				info.m_nZOffset = 0;
				info.m_SrcFormat = pVTFTexture->Format();
				info.m_pSrcData = pSourceBits;
				info.m_bSrcIsTiled = pVTFTexture->IsPreTiled();
				info.m_bCanConvertFormat = ( pTexture->m_Flags & Texture_t::CAN_CONVERT_FORMAT ) != 0;
				LoadTexture( info );
			}
		}

		pTexture->m_Flags |= Texture_t::IS_FINALIZED;
		(*pRefCount)--;
	}

	// success
	bOK = true;

cleanUp:
	if ( pVTFTexture )
	{
		DestroyVTFTexture( pVTFTexture );
	}

	if ( !bOK )
	{
		// undo artificial lock
		(*pRefCount) -= numHandles;
	}

	return bOK;
}	
#endif

#if defined( _X360 )
void *CShaderAPIDx8::GetD3DDevice()
{
	return Dx9Device();
}
#endif

#if defined( _X360 )
static void r_enable_gpr_allocations_callback( IConVar *var, const char *pOldValue, float flOldValue )
{
	if ( ((ConVar *)var)->GetBool() == false )
	{
		//reset back the default 64/64 allocation before we stop updating
		if( Dx9Device() != NULL )
		{
			Dx9Device()->SetShaderGPRAllocation( 0, 0, 0 );
		}
	}
}

ConVar r_enable_gpr_allocations( "r_enable_gpr_allocations", "1", 0, "Enable usage of IDirect3DDevice9::SetShaderGPRAllocation()", r_enable_gpr_allocations_callback );

static void CommitShaderGPRs( IDirect3DDevice9 *pDevice, const DynamicState_t &desiredState, DynamicState_t &currentState, bool bForce )
{
	if( desiredState.m_iVertexShaderGPRAllocation != currentState.m_iVertexShaderGPRAllocation )
	{
		pDevice->SetShaderGPRAllocation( 0, desiredState.m_iVertexShaderGPRAllocation, 128 - desiredState.m_iVertexShaderGPRAllocation );
		currentState.m_iVertexShaderGPRAllocation = desiredState.m_iVertexShaderGPRAllocation;
	}
}

void CShaderAPIDx8::PushVertexShaderGPRAllocation( int iVertexShaderCount )
{
	Assert( (iVertexShaderCount >= 16) && (iVertexShaderCount <= 112) );
	m_VertexShaderGPRAllocationStack.Push( iVertexShaderCount );

	if ( r_enable_gpr_allocations.GetBool() )
	{
		if ( m_DynamicState.m_iVertexShaderGPRAllocation != iVertexShaderCount )
		{
			m_DesiredState.m_iVertexShaderGPRAllocation = iVertexShaderCount;
			ADD_COMMIT_FUNC( COMMIT_PER_DRAW, COMMIT_ALWAYS, CommitShaderGPRs );
		}
	}
}

void CShaderAPIDx8::PopVertexShaderGPRAllocation( void )
{
	m_VertexShaderGPRAllocationStack.Pop();

	if ( r_enable_gpr_allocations.GetBool() )
	{
		int iVertexShaderCount;
		if ( m_VertexShaderGPRAllocationStack.Count() )
			iVertexShaderCount = m_VertexShaderGPRAllocationStack.Top();
		else
			iVertexShaderCount = 64;

		if ( m_DynamicState.m_iVertexShaderGPRAllocation != iVertexShaderCount )
		{
			m_DesiredState.m_iVertexShaderGPRAllocation = iVertexShaderCount;
			ADD_COMMIT_FUNC( COMMIT_PER_DRAW, COMMIT_ALWAYS, CommitShaderGPRs );
		}
	}
}

void CShaderAPIDx8::EnableVSync_360( bool bEnable )
{
	if( bEnable )
	{
		Dx9Device()->SetRenderState( D3DRS_PRESENTIMMEDIATETHRESHOLD, 0 ); //only swap on vertical blanks
	}
	else
	{
		Dx9Device()->SetRenderState( D3DRS_PRESENTIMMEDIATETHRESHOLD, 100 ); //allow a swap at any point in the DAC scan
	}
}
#endif

// ------------ New Vertex/Index Buffer interface ----------------------------

void CShaderAPIDx8::BindVertexBuffer( int streamID, IVertexBuffer *pVertexBuffer, int nOffsetInBytes, int nFirstVertex, int nVertexCount, VertexFormat_t fmt, int nRepetitions )
{
	LOCK_SHADERAPI();
	MeshMgr()->BindVertexBuffer( streamID, pVertexBuffer, nOffsetInBytes, nFirstVertex, nVertexCount, fmt, nRepetitions );
}

void CShaderAPIDx8::BindIndexBuffer( IIndexBuffer *pIndexBuffer, int nOffsetInBytes )
{
	LOCK_SHADERAPI();
	MeshMgr()->BindIndexBuffer( pIndexBuffer, nOffsetInBytes );
}

void CShaderAPIDx8::Draw( MaterialPrimitiveType_t primitiveType, int nFirstIndex, int nIndexCount )
{
	LOCK_SHADERAPI();
	MeshMgr()->Draw( primitiveType, nFirstIndex, nIndexCount );
}

// ------------ End ----------------------------

float CShaderAPIDx8::GammaToLinear_HardwareSpecific( float fGamma ) const
{
	if( IsPC() )
	{
		return SrgbGammaToLinear( fGamma );
	}
	else if( IsX360() )
	{
		return SrgbGammaToLinear( fGamma );
	}
	else
	{
		// Unknown console
		return pow( fGamma, 2.2f ); // Use a gamma 2.2 curve
	}
}

float CShaderAPIDx8::LinearToGamma_HardwareSpecific( float fLinear ) const
{
	if ( IsPC() )
	{
		return SrgbLinearToGamma( fLinear );
	}
	else if ( IsX360() )
	{
		return SrgbLinearToGamma( fLinear );
	}
	else
	{
		// Unknown console
		return pow( fLinear, ( 1.0f / 2.2f ) ); // Use a gamma 2.2 curve
	}
}


bool CShaderAPIDx8::ShouldWriteDepthToDestAlpha( void ) const
{
	return IsPC() && g_pHardwareConfig->SupportsPixelShaders_2_b() &&
			(m_SceneFogMode != MATERIAL_FOG_LINEAR_BELOW_FOG_Z) && 
			(GetIntRenderingParameter(INT_RENDERPARM_WRITE_DEPTH_TO_DESTALPHA) != 0);
}

//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
void CShaderAPIDx8::AcquireThreadOwnership()
{
	SetCurrentThreadAsOwner();
#if (defined( _X360 ) || defined( DX_TO_GL_ABSTRACTION ))
	Dx9Device()->AcquireThreadOwnership();
#endif
}

//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
void CShaderAPIDx8::ReleaseThreadOwnership()
{
	RemoveThreadOwner();
#if (defined( _X360 ) || defined( DX_TO_GL_ABSTRACTION ))
	Dx9Device()->ReleaseThreadOwnership();
#endif
}


//-----------------------------------------------------------------------------
// Actual low level setting of the color RT. All Xbox RT funnels here
// to track the actual RT state.  Returns true if the RT gets set, otherwise false.
//-----------------------------------------------------------------------------
bool CShaderAPIDx8::SetRenderTargetInternalXbox( ShaderAPITextureHandle_t hRenderTargetTexture, bool bForce )
{
	// valid for 360 only
	if ( IsPC() )
	{
		Assert( 0 );
		return false;
	}

	if ( hRenderTargetTexture == INVALID_SHADERAPI_TEXTURE_HANDLE )
	{
		// could be a reset, force to back buffer
		hRenderTargetTexture = SHADER_RENDERTARGET_BACKBUFFER;
	}

	if ( m_hCachedRenderTarget == INVALID_SHADERAPI_TEXTURE_HANDLE )
	{
		// let the set go through to establish the initial state
		bForce = true;
	}

	if ( !bForce && ( hRenderTargetTexture == m_hCachedRenderTarget && m_DynamicState.m_bSRGBWritesEnabled == m_bUsingSRGBRenderTarget ) )
	{
		// current RT matches expected state, leave state intact
		return false;
	}

	// track the updated state
	m_bUsingSRGBRenderTarget = m_DynamicState.m_bSRGBWritesEnabled;
	m_hCachedRenderTarget = hRenderTargetTexture;

	IDirect3DSurface *pSurface;
	if ( m_hCachedRenderTarget == SHADER_RENDERTARGET_BACKBUFFER )
	{
		if ( !m_bUsingSRGBRenderTarget )
		{
			pSurface = m_pBackBufferSurface;
		}
		else
		{
			pSurface = m_pBackBufferSurfaceSRGB;
		}
	}
	else
	{
		AssertValidTextureHandle( m_hCachedRenderTarget );
		Texture_t *pTexture = &GetTexture( m_hCachedRenderTarget );
		pSurface = pTexture->GetRenderTargetSurface( m_bUsingSRGBRenderTarget );
	}

	// the 360 does a wierd reset of some states on a SetRenderTarget()
	// the viewport is a clobbered state, it may not be changed by later callers, so it MUST be put back as expected
	// the other clobbered states are waiting to be discovered ... sigh
#if defined( _X360 )
	D3DVIEWPORT9 viewport;
	Dx9Device()->GetViewport( &viewport );
	Dx9Device()->SetRenderTarget( 0, pSurface );
	Dx9Device()->SetViewport( &viewport );
#endif

	return true;
}


//-----------------------------------------------------------------------------
// debug logging
//-----------------------------------------------------------------------------
void CShaderAPIDx8::PrintfVA( char *fmt, va_list vargs )
{
#ifdef DX_TO_GL_ABSTRACTION
	#if GLMDEBUG
		GLMPrintfVA( fmt, vargs );
	#endif
#else
	AssertOnce( !"Impl me" );
#endif
}

void CShaderAPIDx8::Printf( const char *fmt, ... )
{
#ifdef DX_TO_GL_ABSTRACTION
	#if GLMDEBUG
		va_list	vargs;

		va_start(vargs, fmt);

		GLMPrintfVA( fmt, vargs );

		va_end( vargs );
	#endif
#else
	AssertOnce( !"Impl me" );
#endif
}

float CShaderAPIDx8::Knob( char *knobname, float *setvalue )
{
#ifdef DX_TO_GL_ABSTRACTION
	#if GLMDEBUG
		return GLMKnob( knobname, setvalue );
	#else
		return 0.0f;
	#endif
#else
	return 0.0f;
#endif
}



#if defined( _X360 )

extern ConVar r_blocking_spew_threshold;
void D3DBlockingSpewCallback( DWORD Flags, D3DBLOCKTYPE BlockType, float ClockTime, DWORD ThreadTime )
{
	if( ClockTime >= r_blocking_spew_threshold.GetFloat() )
	{
		const char *pBlockType = "";
		switch( BlockType )
		{		
		case D3DBLOCKTYPE_NONE:
			pBlockType = "D3DBLOCKTYPE_NONE";
			break;
		case D3DBLOCKTYPE_PRIMARY_OVERRUN:
			pBlockType = "D3DBLOCKTYPE_PRIMARY_OVERRUN";
			break;
		case D3DBLOCKTYPE_SECONDARY_OVERRUN:
			pBlockType = "D3DBLOCKTYPE_SECONDARY_OVERRUN";
			break;
		case D3DBLOCKTYPE_SWAP_THROTTLE:
			pBlockType = "D3DBLOCKTYPE_SWAP_THROTTLE";
			break;
		case D3DBLOCKTYPE_BLOCK_UNTIL_IDLE:
			pBlockType = "D3DBLOCKTYPE_BLOCK_UNTIL_IDLE";
			break;
		case D3DBLOCKTYPE_BLOCK_UNTIL_NOT_BUSY:
			pBlockType = "D3DBLOCKTYPE_BLOCK_UNTIL_NOT_BUSY";
			break;
		case D3DBLOCKTYPE_BLOCK_ON_FENCE:
			pBlockType = "D3DBLOCKTYPE_BLOCK_ON_FENCE";
			break;
		case D3DBLOCKTYPE_VERTEX_SHADER_RELEASE:
			pBlockType = "D3DBLOCKTYPE_VERTEX_SHADER_RELEASE";
			break;
		case D3DBLOCKTYPE_PIXEL_SHADER_RELEASE:
			pBlockType = "D3DBLOCKTYPE_PIXEL_SHADER_RELEASE";
			break;
		case D3DBLOCKTYPE_VERTEX_BUFFER_RELEASE:
			pBlockType = "D3DBLOCKTYPE_VERTEX_BUFFER_RELEASE";
			break;
		case D3DBLOCKTYPE_VERTEX_BUFFER_LOCK:
			pBlockType = "D3DBLOCKTYPE_VERTEX_BUFFER_LOCK";
			break;
		case D3DBLOCKTYPE_INDEX_BUFFER_RELEASE:
			pBlockType = "D3DBLOCKTYPE_INDEX_BUFFER_RELEASE";
			break;
		case D3DBLOCKTYPE_INDEX_BUFFER_LOCK:
			pBlockType = "D3DBLOCKTYPE_INDEX_BUFFER_LOCK";
			break;
		case D3DBLOCKTYPE_TEXTURE_RELEASE:
			pBlockType = "D3DBLOCKTYPE_TEXTURE_RELEASE";
			break;
		case D3DBLOCKTYPE_TEXTURE_LOCK:
			pBlockType = "D3DBLOCKTYPE_TEXTURE_LOCK";
			break;
		case D3DBLOCKTYPE_COMMAND_BUFFER_RELEASE:
			pBlockType = "D3DBLOCKTYPE_COMMAND_BUFFER_RELEASE";
			break;
		case D3DBLOCKTYPE_COMMAND_BUFFER_LOCK:
			pBlockType = "D3DBLOCKTYPE_COMMAND_BUFFER_LOCK";
			break;
		case D3DBLOCKTYPE_CONSTANT_BUFFER_RELEASE:
			pBlockType = "D3DBLOCKTYPE_CONSTANT_BUFFER_RELEASE";
			break;
		case D3DBLOCKTYPE_CONSTANT_BUFFER_LOCK:
			pBlockType = "D3DBLOCKTYPE_CONSTANT_BUFFER_LOCK";
			break;

		NO_DEFAULT;
		};

		Warning( "D3D Block: %s for %.2f ms\n", pBlockType, ClockTime );
	}
}

static void r_blocking_spew_threshold_callback( IConVar *var, const char *pOldValue, float flOldValue )
{
	if( Dx9Device() != NULL )
	{
		if ( ((ConVar *)var)->GetFloat() >= 0.0f )
		{
			Dx9Device()->SetBlockCallback( 0, D3DBlockingSpewCallback );
		}
		else
		{
			Dx9Device()->SetBlockCallback( 0, NULL );
		}
	}
}

ConVar r_blocking_spew_threshold( "r_blocking_spew_threshold", "-1", 0, "Enable spew of Direct3D Blocks. Specify the minimum blocking time in milliseconds before spewing a warning.", r_blocking_spew_threshold_callback );
#endif

